]> git.ipfire.org Git - thirdparty/bird.git/blame - sysdep/linux/netlink.c
BGP: Add support for SAFI 129 (VPN multicast)
[thirdparty/bird.git] / sysdep / linux / netlink.c
CommitLineData
95616c82
OZ
1/*
2 * BIRD -- Linux Netlink Interface
3 *
4 * (c) 1999--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
a8caff32 9#include <alloca.h>
95616c82 10#include <stdio.h>
f83ce94d 11#include <unistd.h>
95616c82
OZ
12#include <fcntl.h>
13#include <sys/socket.h>
14#include <sys/uio.h>
15#include <errno.h>
16
17#undef LOCAL_DEBUG
18
19#include "nest/bird.h"
20#include "nest/route.h"
21#include "nest/protocol.h"
22#include "nest/iface.h"
4e276a89 23#include "lib/alloca.h"
7152e5ef
MM
24#include "sysdep/unix/timer.h"
25#include "sysdep/unix/unix.h"
26#include "sysdep/unix/krt.h"
95616c82
OZ
27#include "lib/socket.h"
28#include "lib/string.h"
9ddbfbdd 29#include "lib/hash.h"
95616c82
OZ
30#include "conf/conf.h"
31
32#include <asm/types.h>
33#include <linux/if.h>
d14f8c3c 34#include <linux/lwtunnel.h>
95616c82
OZ
35#include <linux/netlink.h>
36#include <linux/rtnetlink.h>
37
9ddbfbdd 38
95616c82
OZ
39#ifndef MSG_TRUNC /* Hack: Several versions of glibc miss this one :( */
40#define MSG_TRUNC 0x20
41#endif
42
a08a81c6
OZ
43#ifndef IFA_FLAGS
44#define IFA_FLAGS 8
45#endif
46
95616c82
OZ
47#ifndef IFF_LOWER_UP
48#define IFF_LOWER_UP 0x10000
49#endif
50
9ddbfbdd
MM
51#ifndef RTA_TABLE
52#define RTA_TABLE 15
53#endif
54
d14f8c3c
MM
55#ifndef RTA_VIA
56#define RTA_VIA 18
57#endif
58
59#ifndef RTA_NEWDST
60#define RTA_NEWDST 19
61#endif
62
63#ifndef RTA_ENCAP_TYPE
64#define RTA_ENCAP_TYPE 21
65#endif
66
67#ifndef RTA_ENCAP
68#define RTA_ENCAP 22
69#endif
9ddbfbdd 70
cc5b93f7 71#define krt_ecmp6(p) ((p)->af == AF_INET6)
2feaa693
OZ
72
73/*
74 * Structure nl_parse_state keeps state of received route processing. Ideally,
75 * we could just independently parse received Netlink messages and immediately
76 * propagate received routes to the rest of BIRD, but Linux kernel represents
77 * and announces IPv6 ECMP routes not as one route with multiple next hops (like
78 * RTA_MULTIPATH in IPv4 ECMP), but as a set of routes with the same prefix.
79 *
80 * Therefore, BIRD keeps currently processed route in nl_parse_state structure
81 * and postpones its propagation until we expect it to be final; i.e., when
82 * non-matching route is received or when the scan ends. When another matching
83 * route is received, it is merged with the already processed route to form an
84 * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
85 * postponing is done in both cases (for simplicity). All IPv4 routes are just
86 * considered non-matching.
87 *
88 * This is ignored for asynchronous notifications (every notification is handled
89 * as a separate route). It is not an issue for our routes, as we ignore such
90 * notifications anyways. But importing alien IPv6 ECMP routes does not work
91 * properly.
92 */
93
94struct nl_parse_state
95{
96 struct linpool *pool;
97 int scan;
98 int merge;
99
100 net *net;
101 rta *attrs;
102 struct krt_proto *proto;
103 s8 new;
104 s8 krt_src;
105 u8 krt_type;
106 u8 krt_proto;
107 u32 krt_metric;
108};
109
95616c82
OZ
110/*
111 * Synchronous Netlink interface
112 */
113
114struct nl_sock
115{
116 int fd;
117 u32 seq;
118 byte *rx_buffer; /* Receive buffer */
119 struct nlmsghdr *last_hdr; /* Recently received packet */
ae80a2de 120 uint last_size;
95616c82
OZ
121};
122
123#define NL_RX_SIZE 8192
124
2feaa693
OZ
125#define NL_OP_DELETE 0
126#define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
127#define NL_OP_REPLACE (NLM_F_CREATE|NLM_F_REPLACE)
128#define NL_OP_APPEND (NLM_F_CREATE|NLM_F_APPEND)
129
130static linpool *nl_linpool;
131
95616c82
OZ
132static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for synchronous scan */
133static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */
134
135static void
136nl_open_sock(struct nl_sock *nl)
137{
138 if (nl->fd < 0)
139 {
140 nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
141 if (nl->fd < 0)
142 die("Unable to open rtnetlink socket: %m");
143 nl->seq = now;
144 nl->rx_buffer = xmalloc(NL_RX_SIZE);
145 nl->last_hdr = NULL;
146 nl->last_size = 0;
147 }
148}
149
150static void
151nl_open(void)
152{
153 nl_open_sock(&nl_scan);
154 nl_open_sock(&nl_req);
155}
156
157static void
158nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
159{
160 struct sockaddr_nl sa;
161
162 memset(&sa, 0, sizeof(sa));
163 sa.nl_family = AF_NETLINK;
164 nh->nlmsg_pid = 0;
165 nh->nlmsg_seq = ++(nl->seq);
166 if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
167 die("rtnetlink sendto: %m");
168 nl->last_hdr = NULL;
169}
170
171static void
86c3eea0 172nl_request_dump(int af, int cmd)
95616c82
OZ
173{
174 struct {
175 struct nlmsghdr nh;
176 struct rtgenmsg g;
641172c6
OZ
177 } req = {
178 .nh.nlmsg_type = cmd,
179 .nh.nlmsg_len = sizeof(req),
180 .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
181 .g.rtgen_family = af
182 };
95616c82
OZ
183 nl_send(&nl_scan, &req.nh);
184}
185
186static struct nlmsghdr *
187nl_get_reply(struct nl_sock *nl)
188{
189 for(;;)
190 {
191 if (!nl->last_hdr)
192 {
193 struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
194 struct sockaddr_nl sa;
31e9e101
ST
195 struct msghdr m = {
196 .msg_name = &sa,
197 .msg_namelen = sizeof(sa),
198 .msg_iov = &iov,
199 .msg_iovlen = 1,
200 };
95616c82
OZ
201 int x = recvmsg(nl->fd, &m, 0);
202 if (x < 0)
203 die("nl_get_reply: %m");
204 if (sa.nl_pid) /* It isn't from the kernel */
205 {
206 DBG("Non-kernel packet\n");
207 continue;
208 }
209 nl->last_size = x;
210 nl->last_hdr = (void *) nl->rx_buffer;
211 if (m.msg_flags & MSG_TRUNC)
212 bug("nl_get_reply: got truncated reply which should be impossible");
213 }
214 if (NLMSG_OK(nl->last_hdr, nl->last_size))
215 {
216 struct nlmsghdr *h = nl->last_hdr;
217 nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
218 if (h->nlmsg_seq != nl->seq)
219 {
220 log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
221 h->nlmsg_seq, nl->seq);
222 continue;
223 }
224 return h;
225 }
226 if (nl->last_size)
227 log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
228 nl->last_hdr = NULL;
229 }
230}
231
1123e707 232static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
95616c82
OZ
233
234static int
2feaa693 235nl_error(struct nlmsghdr *h, int ignore_esrch)
95616c82
OZ
236{
237 struct nlmsgerr *e;
238 int ec;
239
240 if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
241 {
242 log(L_WARN "Netlink: Truncated error message received");
243 return ENOBUFS;
244 }
245 e = (struct nlmsgerr *) NLMSG_DATA(h);
246 ec = -e->error;
2feaa693 247 if (ec && !(ignore_esrch && (ec == ESRCH)))
95616c82
OZ
248 log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
249 return ec;
250}
251
252static struct nlmsghdr *
253nl_get_scan(void)
254{
255 struct nlmsghdr *h = nl_get_reply(&nl_scan);
256
257 if (h->nlmsg_type == NLMSG_DONE)
258 return NULL;
259 if (h->nlmsg_type == NLMSG_ERROR)
260 {
2feaa693 261 nl_error(h, 0);
95616c82
OZ
262 return NULL;
263 }
264 return h;
265}
266
267static int
2feaa693 268nl_exchange(struct nlmsghdr *pkt, int ignore_esrch)
95616c82
OZ
269{
270 struct nlmsghdr *h;
271
272 nl_send(&nl_req, pkt);
273 for(;;)
274 {
275 h = nl_get_reply(&nl_req);
276 if (h->nlmsg_type == NLMSG_ERROR)
277 break;
278 log(L_WARN "nl_exchange: Unexpected reply received");
279 }
2feaa693 280 return nl_error(h, ignore_esrch) ? -1 : 0;
95616c82
OZ
281}
282
283/*
284 * Netlink attributes
285 */
286
287static int nl_attr_len;
288
289static void *
290nl_checkin(struct nlmsghdr *h, int lsize)
291{
292 nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
293 if (nl_attr_len < 0)
294 {
295 log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
296 return NULL;
297 }
298 return NLMSG_DATA(h);
299}
300
ad276157
MM
301struct nl_want_attrs {
302 u8 defined:1;
303 u8 checksize:1;
304 u8 size;
305};
306
307
308#define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
309
310static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = {
311 [IFLA_IFNAME] = { 1, 0, 0 },
312 [IFLA_MTU] = { 1, 1, sizeof(u32) },
313 [IFLA_WIRELESS] = { 1, 0, 0 },
314};
315
29a64162 316
e37d2e3e 317#define BIRD_IFA_MAX (IFA_FLAGS+1)
ad276157 318
ad276157
MM
319static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = {
320 [IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) },
321 [IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) },
322 [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) },
62e64905 323 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
ad276157 324};
29a64162 325
ad276157
MM
326static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
327 [IFA_ADDRESS] = { 1, 1, sizeof(ip6_addr) },
328 [IFA_LOCAL] = { 1, 1, sizeof(ip6_addr) },
e37d2e3e 329 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
ad276157 330};
29a64162 331
ad276157 332
d14f8c3c 333#define BIRD_RTA_MAX (RTA_ENCAP+1)
ad276157 334
4e276a89 335static struct nl_want_attrs nexthop_attr_want4[BIRD_RTA_MAX] = {
ad276157 336 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
d14f8c3c
MM
337 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
338 [RTA_ENCAP] = { 1, 0, 0 },
339};
340
341static struct nl_want_attrs encap_mpls_want[BIRD_RTA_MAX] = {
342 [RTA_DST] = { 1, 0, 0 },
ad276157
MM
343};
344
ad276157
MM
345static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
346 [RTA_DST] = { 1, 1, sizeof(ip4_addr) },
347 [RTA_OIF] = { 1, 1, sizeof(u32) },
348 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
349 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
350 [RTA_PREFSRC] = { 1, 1, sizeof(ip4_addr) },
351 [RTA_METRICS] = { 1, 0, 0 },
352 [RTA_MULTIPATH] = { 1, 0, 0 },
353 [RTA_FLOW] = { 1, 1, sizeof(u32) },
354 [RTA_TABLE] = { 1, 1, sizeof(u32) },
d14f8c3c
MM
355 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
356 [RTA_ENCAP] = { 1, 0, 0 },
ad276157 357};
29a64162 358
ad276157
MM
359static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
360 [RTA_DST] = { 1, 1, sizeof(ip6_addr) },
361 [RTA_IIF] = { 1, 1, sizeof(u32) },
362 [RTA_OIF] = { 1, 1, sizeof(u32) },
363 [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
364 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
365 [RTA_PREFSRC] = { 1, 1, sizeof(ip6_addr) },
366 [RTA_METRICS] = { 1, 0, 0 },
367 [RTA_FLOW] = { 1, 1, sizeof(u32) },
368 [RTA_TABLE] = { 1, 1, sizeof(u32) },
d14f8c3c
MM
369 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
370 [RTA_ENCAP] = { 1, 0, 0 },
371};
372
373static struct nl_want_attrs rtm_attr_want_mpls[BIRD_RTA_MAX] = {
374 [RTA_DST] = { 1, 1, sizeof(u32) },
375 [RTA_IIF] = { 1, 1, sizeof(u32) },
376 [RTA_OIF] = { 1, 1, sizeof(u32) },
377 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
378 [RTA_METRICS] = { 1, 0, 0 },
379 [RTA_FLOW] = { 1, 1, sizeof(u32) },
380 [RTA_TABLE] = { 1, 1, sizeof(u32) },
381 [RTA_VIA] = { 1, 0, 0 },
382 [RTA_NEWDST] = { 1, 0, 0 },
ad276157 383};
ad276157
MM
384
385
95616c82 386static int
ad276157 387nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, int ksize)
95616c82
OZ
388{
389 int max = ksize / sizeof(struct rtattr *);
390 bzero(k, ksize);
ad276157
MM
391
392 for ( ; RTA_OK(a, nl_attr_len); a = RTA_NEXT(a, nl_attr_len))
95616c82 393 {
ad276157
MM
394 if ((a->rta_type >= max) || !want[a->rta_type].defined)
395 continue;
396
397 if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size))
398 {
9b136840 399 log(L_ERR "nl_parse_attrs: Malformed attribute received");
ad276157
MM
400 return 0;
401 }
402
403 k[a->rta_type] = a;
95616c82 404 }
ad276157 405
95616c82
OZ
406 if (nl_attr_len)
407 {
408 log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
409 return 0;
410 }
ad276157
MM
411
412 return 1;
95616c82
OZ
413}
414
d14f8c3c
MM
415static inline u16 rta_get_u16(struct rtattr *a)
416{ return *(u16 *) RTA_DATA(a); }
417
fce764f9 418static inline u32 rta_get_u32(struct rtattr *a)
acb04cfd
OZ
419{ return *(u32 *) RTA_DATA(a); }
420
421static inline ip4_addr rta_get_ip4(struct rtattr *a)
422{ return ip4_ntoh(*(ip4_addr *) RTA_DATA(a)); }
423
424static inline ip6_addr rta_get_ip6(struct rtattr *a)
425{ return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); }
426
9b136840
MM
427static inline ip_addr rta_get_ipa(struct rtattr *a)
428{
429 if (RTA_PAYLOAD(a) == sizeof(ip4_addr))
430 return ipa_from_ip4(rta_get_ip4(a));
431 else
432 return ipa_from_ip6(rta_get_ip6(a));
433}
acb04cfd 434
d14f8c3c
MM
435static inline ip_addr rta_get_via(struct rtattr *a)
436{
437 struct rtvia *v = RTA_DATA(a);
438 switch(v->rtvia_family) {
439 case AF_INET: return ipa_from_ip4(ip4_ntoh(*(ip4_addr *) v->rtvia_addr));
440 case AF_INET6: return ipa_from_ip6(ip6_ntoh(*(ip6_addr *) v->rtvia_addr));
441 }
442 return IPA_NONE;
443}
444
445static u32 rta_mpls_stack[MPLS_MAX_LABEL_STACK];
446static inline int rta_get_mpls(struct rtattr *a, u32 *stack)
447{
448 if (RTA_PAYLOAD(a) % 4)
449 log(L_WARN "KRT: Strange length of received MPLS stack: %u", RTA_PAYLOAD(a));
450
451 return mpls_get(RTA_DATA(a), RTA_PAYLOAD(a) & ~0x3, stack);
452}
453
9fdf9d29
OZ
454struct rtattr *
455nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen)
95616c82 456{
9fdf9d29
OZ
457 uint pos = NLMSG_ALIGN(h->nlmsg_len);
458 uint len = RTA_LENGTH(dlen);
95616c82
OZ
459
460 if (pos + len > bufsize)
461 bug("nl_add_attr: packet buffer overflow");
9fdf9d29
OZ
462
463 struct rtattr *a = (struct rtattr *)((char *)h + pos);
95616c82
OZ
464 a->rta_type = code;
465 a->rta_len = len;
466 h->nlmsg_len = pos + len;
9fdf9d29
OZ
467
468 if (dlen > 0)
469 memcpy(RTA_DATA(a), data, dlen);
470
471 return a;
95616c82
OZ
472}
473
d14f8c3c
MM
474static inline struct rtattr *
475nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
476{
477 return nl_add_attr(h, bufsize, code, NULL, 0);
478}
479
480static inline void
481nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
482{
483 a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a;
484}
485
486static inline void
487nl_add_attr_u16(struct nlmsghdr *h, uint bufsize, int code, u16 data)
488{
489 nl_add_attr(h, bufsize, code, &data, 2);
490}
491
95616c82 492static inline void
29a64162 493nl_add_attr_u32(struct nlmsghdr *h, uint bufsize, int code, u32 data)
95616c82
OZ
494{
495 nl_add_attr(h, bufsize, code, &data, 4);
496}
497
498static inline void
29a64162 499nl_add_attr_ip4(struct nlmsghdr *h, uint bufsize, int code, ip4_addr ip4)
95616c82 500{
29a64162
OZ
501 ip4 = ip4_hton(ip4);
502 nl_add_attr(h, bufsize, code, &ip4, sizeof(ip4));
503}
504
505static inline void
506nl_add_attr_ip6(struct nlmsghdr *h, uint bufsize, int code, ip6_addr ip6)
507{
508 ip6 = ip6_hton(ip6);
509 nl_add_attr(h, bufsize, code, &ip6, sizeof(ip6));
510}
511
512static inline void
513nl_add_attr_ipa(struct nlmsghdr *h, uint bufsize, int code, ip_addr ipa)
514{
515 if (ipa_is_ip4(ipa))
516 nl_add_attr_ip4(h, bufsize, code, ipa_to_ip4(ipa));
9b136840 517 else
29a64162 518 nl_add_attr_ip6(h, bufsize, code, ipa_to_ip6(ipa));
95616c82
OZ
519}
520
d14f8c3c
MM
521static inline void
522nl_add_attr_mpls(struct nlmsghdr *h, uint bufsize, int code, int len, u32 *stack)
9fdf9d29 523{
d14f8c3c
MM
524 char buf[len*4];
525 mpls_put(buf, len, stack);
526 nl_add_attr(h, bufsize, code, buf, len*4);
9fdf9d29 527}
95616c82
OZ
528
529static inline void
d14f8c3c 530nl_add_attr_mpls_encap(struct nlmsghdr *h, uint bufsize, int len, u32 *stack)
95616c82 531{
d14f8c3c
MM
532 nl_add_attr_u16(h, bufsize, RTA_ENCAP_TYPE, LWTUNNEL_ENCAP_MPLS);
533
534 struct rtattr *nest = nl_open_attr(h, bufsize, RTA_ENCAP);
535 nl_add_attr_mpls(h, bufsize, RTA_DST, len, stack);
536 nl_close_attr(h, nest);
537}
538
539static inline void
540nl_add_attr_via(struct nlmsghdr *h, uint bufsize, ip_addr ipa)
541{
542 struct rtattr *nest = nl_open_attr(h, bufsize, RTA_VIA);
543 struct rtvia *via = RTA_DATA(nest);
544
545 h->nlmsg_len += sizeof(*via);
546
62e64905
OZ
547 if (ipa_is_ip4(ipa))
548 {
d14f8c3c 549 via->rtvia_family = AF_INET;
62e64905
OZ
550 put_ip4(via->rtvia_addr, ipa_to_ip4(ipa));
551 h->nlmsg_len += sizeof(ip4_addr);
552 }
553 else
554 {
d14f8c3c 555 via->rtvia_family = AF_INET6;
62e64905
OZ
556 put_ip6(via->rtvia_addr, ipa_to_ip6(ipa));
557 h->nlmsg_len += sizeof(ip6_addr);
d14f8c3c
MM
558 }
559
560 nl_close_attr(h, nest);
95616c82
OZ
561}
562
9fdf9d29
OZ
563static inline struct rtnexthop *
564nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
565{
566 uint pos = NLMSG_ALIGN(h->nlmsg_len);
567 uint len = RTNH_LENGTH(0);
568
569 if (pos + len > bufsize)
570 bug("nl_open_nexthop: packet buffer overflow");
571
572 h->nlmsg_len = pos + len;
573
574 return (void *)h + pos;
575}
576
577static inline void
578nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh)
579{
580 nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh;
581}
95616c82 582
d14f8c3c
MM
583static inline void
584nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af)
585{
586 if (nh->labels > 0)
587 if (af == AF_MPLS)
588 nl_add_attr_mpls(h, bufsize, RTA_NEWDST, nh->labels, nh->label);
589 else
590 nl_add_attr_mpls_encap(h, bufsize, nh->labels, nh->label);
591
592 if (ipa_nonzero(nh->gw))
593 if (af == AF_MPLS)
594 nl_add_attr_via(h, bufsize, nh->gw);
595 else
596 nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
597}
598
95616c82 599static void
d14f8c3c 600nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af)
95616c82 601{
9fdf9d29
OZ
602 struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH);
603
95616c82 604 for (; nh; nh = nh->next)
9fdf9d29
OZ
605 {
606 struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize);
95616c82 607
9fdf9d29
OZ
608 rtnh->rtnh_flags = 0;
609 rtnh->rtnh_hops = nh->weight;
610 rtnh->rtnh_ifindex = nh->iface->index;
95616c82 611
d14f8c3c 612 nl_add_nexthop(h, bufsize, nh, af);
95616c82 613
9fdf9d29
OZ
614 nl_close_nexthop(h, rtnh);
615 }
616
617 nl_close_attr(h, a);
618}
95616c82 619
4e276a89 620static struct nexthop *
95616c82
OZ
621nl_parse_multipath(struct krt_proto *p, struct rtattr *ra)
622{
623 /* Temporary buffer for multicast nexthops */
4e276a89 624 static struct nexthop *nh_buffer;
95616c82
OZ
625 static int nh_buf_size; /* in number of structures */
626 static int nh_buf_used;
627
ad276157 628 struct rtattr *a[BIRD_RTA_MAX];
95616c82 629 struct rtnexthop *nh = RTA_DATA(ra);
4e276a89 630 struct nexthop *rv, *first, **last;
3e236955 631 unsigned len = RTA_PAYLOAD(ra);
95616c82
OZ
632
633 first = NULL;
634 last = &first;
635 nh_buf_used = 0;
636
637 while (len)
638 {
639 /* Use RTNH_OK(nh,len) ?? */
640 if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
641 return NULL;
642
643 if (nh_buf_used == nh_buf_size)
644 {
645 nh_buf_size = nh_buf_size ? (nh_buf_size * 2) : 4;
d14f8c3c 646 nh_buffer = xrealloc(nh_buffer, nh_buf_size * NEXTHOP_MAX_SIZE);
95616c82
OZ
647 }
648 *last = rv = nh_buffer + nh_buf_used++;
649 rv->next = NULL;
650 last = &(rv->next);
651
652 rv->weight = nh->rtnh_hops;
653 rv->iface = if_find_by_index(nh->rtnh_ifindex);
654 if (!rv->iface)
655 return NULL;
656
657 /* Nonexistent RTNH_PAYLOAD ?? */
658 nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
4e276a89 659 nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a));
95616c82
OZ
660 if (a[RTA_GATEWAY])
661 {
23c212e7 662 rv->gw = rta_get_ipa(a[RTA_GATEWAY]);
95616c82 663
23c212e7
OZ
664 neighbor *nbr;
665 nbr = neigh_find2(&p->p, &rv->gw, rv->iface,
666 (nh->rtnh_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
667 if (!nbr || (nbr->scope == SCOPE_HOST))
95616c82
OZ
668 return NULL;
669 }
670 else
d14f8c3c 671 rv->gw = IPA_NONE;
62e64905 672
d14f8c3c
MM
673 if (a[RTA_ENCAP_TYPE])
674 {
675 if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS) {
676 log(L_WARN "KRT: Unknown encapsulation method %d in multipath", rta_get_u16(a[RTA_ENCAP_TYPE]));
677 return NULL;
678 }
679
680 struct rtattr *enca[BIRD_RTA_MAX];
681 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
682 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
683 rv->labels = rta_get_mpls(enca[RTA_DST], rv->label);
684 break;
685 }
686
95616c82
OZ
687
688 len -= NLMSG_ALIGN(nh->rtnh_len);
689 nh = RTNH_NEXT(nh);
690 }
691
692 return first;
693}
694
9fdf9d29
OZ
695static void
696nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, int max)
697{
698 struct rtattr *a = nl_open_attr(h, bufsize, RTA_METRICS);
699 int t;
700
701 for (t = 1; t < max; t++)
702 if (metrics[0] & (1 << t))
703 nl_add_attr_u32(h, bufsize, t, metrics[t]);
704
705 nl_close_attr(h, a);
706}
707
708static int
709nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int max)
710{
711 struct rtattr *a = RTA_DATA(hdr);
712 int len = RTA_PAYLOAD(hdr);
713
714 metrics[0] = 0;
715 for (; RTA_OK(a, len); a = RTA_NEXT(a, len))
716 {
717 if (a->rta_type == RTA_UNSPEC)
718 continue;
719
720 if (a->rta_type >= max)
721 continue;
722
723 if (RTA_PAYLOAD(a) != 4)
724 return -1;
725
726 metrics[0] |= 1 << a->rta_type;
acb04cfd 727 metrics[a->rta_type] = rta_get_u32(a);
9fdf9d29
OZ
728 }
729
730 if (len > 0)
731 return -1;
732
733 return 0;
734}
735
95616c82
OZ
736
737/*
738 * Scanning of interfaces
739 */
740
741static void
742nl_parse_link(struct nlmsghdr *h, int scan)
743{
744 struct ifinfomsg *i;
ad276157 745 struct rtattr *a[BIRD_IFLA_MAX];
95616c82
OZ
746 int new = h->nlmsg_type == RTM_NEWLINK;
747 struct iface f = {};
748 struct iface *ifi;
749 char *name;
750 u32 mtu;
ae80a2de 751 uint fl;
95616c82 752
ad276157 753 if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), ifla_attr_want, a, sizeof(a)))
95616c82 754 return;
ad276157 755 if (!a[IFLA_IFNAME] || (RTA_PAYLOAD(a[IFLA_IFNAME]) < 2) || !a[IFLA_MTU])
95616c82 756 {
ad276157
MM
757 /*
758 * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
759 * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
760 * We simply ignore all such messages with IFLA_WIRELESS without notice.
761 */
762
763 if (a[IFLA_WIRELESS])
764 return;
765
766 log(L_ERR "KIF: Malformed message received");
95616c82
OZ
767 return;
768 }
ad276157 769
95616c82 770 name = RTA_DATA(a[IFLA_IFNAME]);
acb04cfd 771 mtu = rta_get_u32(a[IFLA_MTU]);
95616c82
OZ
772
773 ifi = if_find_by_index(i->ifi_index);
774 if (!new)
775 {
776 DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
777 if (!ifi)
778 return;
779
780 if_delete(ifi);
781 }
782 else
783 {
784 DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
785 if (ifi && strncmp(ifi->name, name, sizeof(ifi->name)-1))
786 if_delete(ifi);
787
788 strncpy(f.name, name, sizeof(f.name)-1);
789 f.index = i->ifi_index;
790 f.mtu = mtu;
791
792 fl = i->ifi_flags;
793 if (fl & IFF_UP)
794 f.flags |= IF_ADMIN_UP;
795 if (fl & IFF_LOWER_UP)
796 f.flags |= IF_LINK_UP;
797 if (fl & IFF_LOOPBACK) /* Loopback */
798 f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
799 else if (fl & IFF_POINTOPOINT) /* PtP */
800 f.flags |= IF_MULTICAST;
801 else if (fl & IFF_BROADCAST) /* Broadcast */
802 f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
803 else
804 f.flags |= IF_MULTIACCESS; /* NBMA */
3216eb03 805
16a3254c
OZ
806 if (fl & IFF_MULTICAST)
807 f.flags |= IF_MULTICAST;
808
3216eb03
OZ
809 ifi = if_update(&f);
810
811 if (!scan)
812 if_end_partial_update(ifi);
95616c82
OZ
813 }
814}
815
816static void
9b136840 817nl_parse_addr4(struct ifaddrmsg *i, int scan, int new)
95616c82 818{
ad276157 819 struct rtattr *a[BIRD_IFA_MAX];
95616c82 820 struct iface *ifi;
e37d2e3e 821 u32 ifa_flags;
95616c82
OZ
822 int scope;
823
9b136840 824 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a)))
95616c82 825 return;
ad276157 826
9b136840 827 if (!a[IFA_LOCAL])
ad276157 828 {
9b136840
MM
829 log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)");
830 return;
ad276157 831 }
ad276157 832 if (!a[IFA_ADDRESS])
95616c82 833 {
ad276157 834 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
95616c82
OZ
835 return;
836 }
837
838 ifi = if_find_by_index(i->ifa_index);
839 if (!ifi)
840 {
841 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
842 return;
843 }
844
e37d2e3e
OZ
845 if (a[IFA_FLAGS])
846 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
847 else
848 ifa_flags = i->ifa_flags;
849
9b136840 850 struct ifa ifa;
95616c82
OZ
851 bzero(&ifa, sizeof(ifa));
852 ifa.iface = ifi;
cc5b93f7 853 if (ifa_flags & IFA_F_SECONDARY)
95616c82
OZ
854 ifa.flags |= IA_SECONDARY;
855
9b136840
MM
856 ifa.ip = rta_get_ipa(a[IFA_LOCAL]);
857
d7661fbe 858 if (i->ifa_prefixlen > IP4_MAX_PREFIX_LENGTH)
95616c82
OZ
859 {
860 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
861 new = 0;
862 }
d7661fbe 863 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH)
95616c82 864 {
9b136840
MM
865 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
866 net_fill_ip4(&ifa.prefix, rta_get_ip4(a[IFA_ADDRESS]), i->ifa_prefixlen);
95616c82
OZ
867
868 /* It is either a host address or a peer address */
9b136840 869 if (ipa_equal(ifa.ip, ifa.brd))
95616c82
OZ
870 ifa.flags |= IA_HOST;
871 else
872 {
873 ifa.flags |= IA_PEER;
9b136840 874 ifa.opposite = ifa.brd;
95616c82
OZ
875 }
876 }
877 else
878 {
9b136840
MM
879 net_fill_ip4(&ifa.prefix, ipa_to_ip4(ifa.ip), i->ifa_prefixlen);
880 net_normalize(&ifa.prefix);
881
d7661fbe 882 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 1)
95616c82
OZ
883 ifa.opposite = ipa_opposite_m1(ifa.ip);
884
d7661fbe 885 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 2)
95616c82
OZ
886 ifa.opposite = ipa_opposite_m2(ifa.ip);
887
888 if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST])
889 {
9b136840
MM
890 ip4_addr xbrd = rta_get_ip4(a[IFA_BROADCAST]);
891 ip4_addr ybrd = ip4_or(ipa_to_ip4(ifa.ip), ip4_not(ip4_mkmask(i->ifa_prefixlen)));
892
893 if (ip4_equal(xbrd, net4_prefix(&ifa.prefix)) || ip4_equal(xbrd, ybrd))
894 ifa.brd = ipa_from_ip4(xbrd);
95616c82 895 else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */
9b136840 896 {
e691d16a 897 log(L_ERR "KIF: Invalid broadcast address %I4 for %s", xbrd, ifi->name);
9b136840
MM
898 ifa.brd = ipa_from_ip4(ybrd);
899 }
900 }
901 }
902
903 scope = ipa_classify(ifa.ip);
904 if (scope < 0)
905 {
906 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
907 return;
908 }
909 ifa.scope = scope & IADDR_SCOPE_MASK;
910
911 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
912 ifi->index, ifi->name,
913 new ? "added" : "removed",
914 ifa.ip, ifa.flags, ifa.prefix, ifa.brd, ifa.opposite);
915
916 if (new)
917 ifa_update(&ifa);
918 else
919 ifa_delete(&ifa);
920
921 if (!scan)
922 if_end_partial_update(ifi);
923}
924
925static void
926nl_parse_addr6(struct ifaddrmsg *i, int scan, int new)
927{
928 struct rtattr *a[BIRD_IFA_MAX];
929 struct iface *ifi;
cc5b93f7 930 u32 ifa_flags;
9b136840
MM
931 int scope;
932
933 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a)))
934 return;
935
936 if (!a[IFA_ADDRESS])
937 {
938 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
939 return;
940 }
941
942 ifi = if_find_by_index(i->ifa_index);
943 if (!ifi)
944 {
945 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
946 return;
947 }
948
cc5b93f7
OZ
949 if (a[IFA_FLAGS])
950 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
951 else
952 ifa_flags = i->ifa_flags;
953
9b136840
MM
954 struct ifa ifa;
955 bzero(&ifa, sizeof(ifa));
956 ifa.iface = ifi;
e37d2e3e 957 if (ifa_flags & IFA_F_SECONDARY)
9b136840
MM
958 ifa.flags |= IA_SECONDARY;
959
e37d2e3e
OZ
960 /* Ignore tentative addresses silently */
961 if (ifa_flags & IFA_F_TENTATIVE)
962 return;
9b136840 963
95616c82 964 /* IFA_LOCAL can be unset for IPv6 interfaces */
9b136840
MM
965 ifa.ip = rta_get_ipa(a[IFA_LOCAL] ? : a[IFA_ADDRESS]);
966
d7661fbe 967 if (i->ifa_prefixlen > IP6_MAX_PREFIX_LENGTH)
9b136840
MM
968 {
969 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
970 new = 0;
971 }
d7661fbe 972 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH)
9b136840
MM
973 {
974 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
975 net_fill_ip6(&ifa.prefix, rta_get_ip6(a[IFA_ADDRESS]), i->ifa_prefixlen);
976
977 /* It is either a host address or a peer address */
978 if (ipa_equal(ifa.ip, ifa.brd))
979 ifa.flags |= IA_HOST;
980 else
981 {
982 ifa.flags |= IA_PEER;
983 ifa.opposite = ifa.brd;
95616c82 984 }
9b136840
MM
985 }
986 else
987 {
988 net_fill_ip6(&ifa.prefix, ipa_to_ip6(ifa.ip), i->ifa_prefixlen);
989 net_normalize(&ifa.prefix);
990
d7661fbe 991 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH - 1)
9b136840 992 ifa.opposite = ipa_opposite_m1(ifa.ip);
95616c82
OZ
993 }
994
995 scope = ipa_classify(ifa.ip);
996 if (scope < 0)
997 {
998 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
999 return;
1000 }
1001 ifa.scope = scope & IADDR_SCOPE_MASK;
1002
9b136840 1003 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
95616c82
OZ
1004 ifi->index, ifi->name,
1005 new ? "added" : "removed",
9b136840 1006 ifa.ip, ifa.flags, ifa.prefix, ifa.brd, ifa.opposite);
3216eb03 1007
95616c82
OZ
1008 if (new)
1009 ifa_update(&ifa);
1010 else
1011 ifa_delete(&ifa);
3216eb03
OZ
1012
1013 if (!scan)
1014 if_end_partial_update(ifi);
95616c82
OZ
1015}
1016
9b136840
MM
1017static void
1018nl_parse_addr(struct nlmsghdr *h, int scan)
1019{
1020 struct ifaddrmsg *i;
1021
1022 if (!(i = nl_checkin(h, sizeof(*i))))
1023 return;
1024
1025 int new = (h->nlmsg_type == RTM_NEWADDR);
1026
1027 switch (i->ifa_family)
1028 {
9b136840
MM
1029 case AF_INET:
1030 return nl_parse_addr4(i, scan, new);
29a64162 1031
9b136840
MM
1032 case AF_INET6:
1033 return nl_parse_addr6(i, scan, new);
9b136840
MM
1034 }
1035}
1036
95616c82
OZ
1037void
1038kif_do_scan(struct kif_proto *p UNUSED)
1039{
1040 struct nlmsghdr *h;
1041
1042 if_start_update();
1043
86c3eea0 1044 nl_request_dump(AF_UNSPEC, RTM_GETLINK);
95616c82
OZ
1045 while (h = nl_get_scan())
1046 if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
1047 nl_parse_link(h, 1);
1048 else
1049 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1050
d7661fbe 1051 nl_request_dump(AF_INET, RTM_GETADDR);
95616c82
OZ
1052 while (h = nl_get_scan())
1053 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
3216eb03 1054 nl_parse_addr(h, 1);
95616c82
OZ
1055 else
1056 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1057
d7661fbe
MM
1058 nl_request_dump(AF_INET6, RTM_GETADDR);
1059 while (h = nl_get_scan())
1060 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
1061 nl_parse_addr(h, 1);
1062 else
1063 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1064
95616c82
OZ
1065 if_end_update();
1066}
1067
1068/*
1069 * Routes
1070 */
1071
9ddbfbdd
MM
1072static inline u32
1073krt_table_id(struct krt_proto *p)
1074{
1075 return KRT_CF->sys.table_id;
1076}
1077
1078static HASH(struct krt_proto) nl_table_map;
1079
29a64162
OZ
1080#define RTH_KEY(p) p->af, krt_table_id(p)
1081#define RTH_NEXT(p) p->sys.hash_next
1082#define RTH_EQ(a1,i1,a2,i2) a1 == a2 && i1 == i2
1083#define RTH_FN(a,i) a ^ u32_hash(i)
9ddbfbdd
MM
1084
1085#define RTH_REHASH rth_rehash
1086#define RTH_PARAMS /8, *2, 2, 2, 6, 20
1087
1088HASH_DEFINE_REHASH_FN(RTH, struct krt_proto)
95616c82
OZ
1089
1090int
1091krt_capable(rte *e)
1092{
1093 rta *a = e->attrs;
1094
95616c82 1095 switch (a->dest)
62e64905 1096 {
4e276a89 1097 case RTD_UNICAST:
95616c82
OZ
1098 case RTD_BLACKHOLE:
1099 case RTD_UNREACHABLE:
1100 case RTD_PROHIBIT:
62e64905
OZ
1101 return 1;
1102
95616c82
OZ
1103 default:
1104 return 0;
62e64905 1105 }
95616c82
OZ
1106}
1107
1108static inline int
4e276a89 1109nh_bufsize(struct nexthop *nh)
95616c82
OZ
1110{
1111 int rv = 0;
1112 for (; nh != NULL; nh = nh->next)
9fdf9d29 1113 rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
95616c82
OZ
1114 return rv;
1115}
1116
1117static int
4e276a89 1118nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int dest, struct nexthop *nh)
95616c82
OZ
1119{
1120 eattr *ea;
1121 net *net = e->net;
1122 rta *a = e->attrs;
4e276a89 1123 int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh));
4adcb9df 1124 u32 priority = 0;
a8caff32 1125
95616c82
OZ
1126 struct {
1127 struct nlmsghdr h;
1128 struct rtmsg r;
a8caff32
MM
1129 char buf[0];
1130 } *r;
1131
1132 int rsize = sizeof(*r) + bufsize;
1133 r = alloca(rsize);
95616c82 1134
cc5b93f7 1135 DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op);
95616c82 1136
a8caff32
MM
1137 bzero(&r->h, sizeof(r->h));
1138 bzero(&r->r, sizeof(r->r));
cc5b93f7 1139 r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE;
a8caff32 1140 r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
cc5b93f7 1141 r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK;
95616c82 1142
a8caff32
MM
1143 r->r.rtm_family = p->af;
1144 r->r.rtm_dst_len = net_pxlen(net->n.addr);
1145 r->r.rtm_protocol = RTPROT_BIRD;
7074be22 1146 r->r.rtm_scope = RT_SCOPE_NOWHERE;
d14f8c3c
MM
1147 if (p->af == AF_MPLS)
1148 {
1149 u32 label = net_mpls(net->n.addr);
1150 nl_add_attr_mpls(&r->h, rsize, RTA_DST, 1, &label);
1151 }
1152 else
1153 nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr));
95616c82 1154
2feaa693
OZ
1155 /*
1156 * Strange behavior for RTM_DELROUTE:
1157 * 1) rtm_family is ignored in IPv6, works for IPv4
1158 * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
1159 * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
1160 */
1161
9ddbfbdd 1162 if (krt_table_id(p) < 256)
a8caff32 1163 r->r.rtm_table = krt_table_id(p);
9ddbfbdd 1164 else
a8caff32 1165 nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p));
9ddbfbdd 1166
4adcb9df
OZ
1167 if (a->source == RTS_DUMMY)
1168 priority = e->u.krt.metric;
1169 else if (KRT_CF->sys.metric)
1170 priority = KRT_CF->sys.metric;
1171 else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC)))
1172 priority = ea->u.data;
78a2cc28 1173
4adcb9df 1174 if (priority)
d1b8fe93 1175 nl_add_attr_u32(&r->h, rsize, RTA_PRIORITY, priority);
78a2cc28 1176
2feaa693
OZ
1177 /* For route delete, we do not specify remaining route attributes */
1178 if (op == NL_OP_DELETE)
1179 goto dest;
78a2cc28 1180
6e75d0d2
OZ
1181 /* Default scope is LINK for device routes, UNIVERSE otherwise */
1182 if (ea = ea_find(eattrs, EA_KRT_SCOPE))
cc5b93f7 1183 r->r.rtm_scope = ea->u.data;
6e75d0d2 1184 else
4e276a89 1185 r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
95616c82
OZ
1186
1187 if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
a8caff32 1188 nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
95616c82
OZ
1189
1190 if (ea = ea_find(eattrs, EA_KRT_REALM))
a8caff32 1191 nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data);
95616c82 1192
9fdf9d29
OZ
1193
1194 u32 metrics[KRT_METRICS_MAX];
1195 metrics[0] = 0;
1196
1197 struct ea_walk_state ews = { .eattrs = eattrs };
1198 while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX))
1199 {
1200 int id = ea->id - EA_KRT_METRICS;
1201 metrics[0] |= 1 << id;
1202 metrics[id] = ea->u.data;
1203 }
1204
1205 if (metrics[0])
a8caff32 1206 nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX);
9fdf9d29
OZ
1207
1208
2feaa693 1209dest:
2feaa693 1210 switch (dest)
95616c82 1211 {
4e276a89 1212 case RTD_UNICAST:
a8caff32 1213 r->r.rtm_type = RTN_UNICAST;
4e276a89 1214 if (nh->next && !krt_ecmp6(p))
d14f8c3c 1215 nl_add_multipath(&r->h, rsize, nh, p->af);
4e276a89
MM
1216 else
1217 {
1218 nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index);
d14f8c3c 1219 nl_add_nexthop(&r->h, rsize, nh, p->af);
4e276a89 1220 }
95616c82
OZ
1221 break;
1222 case RTD_BLACKHOLE:
a8caff32 1223 r->r.rtm_type = RTN_BLACKHOLE;
95616c82
OZ
1224 break;
1225 case RTD_UNREACHABLE:
a8caff32 1226 r->r.rtm_type = RTN_UNREACHABLE;
95616c82
OZ
1227 break;
1228 case RTD_PROHIBIT:
a8caff32 1229 r->r.rtm_type = RTN_PROHIBIT;
95616c82 1230 break;
2feaa693
OZ
1231 case RTD_NONE:
1232 break;
95616c82
OZ
1233 default:
1234 bug("krt_capable inconsistent with nl_send_route");
1235 }
1236
2feaa693 1237 /* Ignore missing for DELETE */
cc5b93f7 1238 return nl_exchange(&r->h, (op == NL_OP_DELETE));
2feaa693
OZ
1239}
1240
1241static inline int
1242nl_add_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs)
1243{
1244 rta *a = e->attrs;
1245 int err = 0;
1246
4e276a89 1247 if (krt_ecmp6(p) && a->nh.next)
2feaa693 1248 {
4e276a89 1249 struct nexthop *nh = &(a->nh);
2feaa693 1250
4e276a89 1251 err = nl_send_route(p, e, eattrs, NL_OP_ADD, RTD_UNICAST, nh);
2feaa693
OZ
1252 if (err < 0)
1253 return err;
1254
1255 for (nh = nh->next; nh; nh = nh->next)
4e276a89 1256 err += nl_send_route(p, e, eattrs, NL_OP_APPEND, RTD_UNICAST, nh);
2feaa693
OZ
1257
1258 return err;
1259 }
1260
4e276a89 1261 return nl_send_route(p, e, eattrs, NL_OP_ADD, a->dest, &(a->nh));
2feaa693
OZ
1262}
1263
1264static inline int
1265nl_delete_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs)
1266{
1267 int err = 0;
1268
1269 /* For IPv6, we just repeatedly request DELETE until we get error */
1270 do
4e276a89 1271 err = nl_send_route(p, e, eattrs, NL_OP_DELETE, RTD_NONE, NULL);
2feaa693
OZ
1272 while (krt_ecmp6(p) && !err);
1273
1274 return err;
95616c82
OZ
1275}
1276
1277void
1278krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list *eattrs)
1279{
1280 int err = 0;
1281
1282 /*
2feaa693
OZ
1283 * We could use NL_OP_REPLACE, but route replace on Linux has some problems:
1284 *
1285 * 1) Does not check for matching rtm_protocol
1286 * 2) Has broken semantics for IPv6 ECMP
1287 * 3) Crashes some kernel version when used for IPv6 ECMP
1288 *
1289 * So we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the old
1290 * route value, so we do not try to optimize IPv6 ECMP reconfigurations.
95616c82
OZ
1291 */
1292
1293 if (old)
2feaa693 1294 nl_delete_rte(p, old, eattrs);
95616c82
OZ
1295
1296 if (new)
2feaa693 1297 err = nl_add_rte(p, new, eattrs);
95616c82
OZ
1298
1299 if (err < 0)
1300 n->n.flags |= KRF_SYNC_ERROR;
1301 else
1302 n->n.flags &= ~KRF_SYNC_ERROR;
1303}
1304
1305
4e276a89
MM
1306static inline struct nexthop *
1307nl_alloc_nexthop(struct nl_parse_state *s, ip_addr gw, struct iface *iface, byte weight)
2feaa693 1308{
4e276a89 1309 struct nexthop *nh = lp_alloc(s->pool, sizeof(struct nexthop));
2feaa693
OZ
1310
1311 nh->gw = gw;
1312 nh->iface = iface;
1313 nh->next = NULL;
1314 nh->weight = weight;
1315
1316 return nh;
1317}
1318
1319static int
1320nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type)
1321{
1322 /* Route merging must be active */
1323 if (!s->merge)
1324 return 0;
1325
1326 /* Saved and new route must have same network, proto/table, and priority */
1327 if ((s->net != net) || (s->proto != p) || (s->krt_metric != priority))
1328 return 0;
1329
1330 /* Both must be regular unicast routes */
1331 if ((s->krt_type != RTN_UNICAST) || (krt_type != RTN_UNICAST))
1332 return 0;
1333
1334 return 1;
1335}
1336
1337static void
1338nl_announce_route(struct nl_parse_state *s)
1339{
1340 rte *e = rte_get_temp(s->attrs);
1341 e->net = s->net;
1342 e->u.krt.src = s->krt_src;
1343 e->u.krt.proto = s->krt_proto;
1344 e->u.krt.seen = 0;
1345 e->u.krt.best = 0;
1346 e->u.krt.metric = s->krt_metric;
1347
1348 if (s->scan)
1349 krt_got_route(s->proto, e);
1350 else
1351 krt_got_route_async(s->proto, e, s->new);
1352
1353 s->net = NULL;
1354 s->attrs = NULL;
1355 s->proto = NULL;
1356 lp_flush(s->pool);
1357}
1358
1359static inline void
1360nl_parse_begin(struct nl_parse_state *s, int scan, int merge)
1361{
1362 memset(s, 0, sizeof (struct nl_parse_state));
1363 s->pool = nl_linpool;
1364 s->scan = scan;
1365 s->merge = merge;
1366}
1367
1368static inline void
1369nl_parse_end(struct nl_parse_state *s)
1370{
1371 if (s->net)
1372 nl_announce_route(s);
1373}
1374
1375
95616c82
OZ
1376#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
1377
1378static void
2feaa693 1379nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
95616c82
OZ
1380{
1381 struct krt_proto *p;
1382 struct rtmsg *i;
ad276157 1383 struct rtattr *a[BIRD_RTA_MAX];
95616c82
OZ
1384 int new = h->nlmsg_type == RTM_NEWROUTE;
1385
29a64162 1386 net_addr dst;
95616c82 1387 u32 oif = ~0;
29a64162 1388 u32 table_id;
2feaa693 1389 u32 priority = 0;
6e75d0d2 1390 u32 def_scope = RT_SCOPE_UNIVERSE;
95616c82
OZ
1391 int src;
1392
ad276157 1393 if (!(i = nl_checkin(h, sizeof(*i))))
95616c82 1394 return;
ad276157
MM
1395
1396 switch (i->rtm_family)
95616c82 1397 {
29a64162
OZ
1398 case AF_INET:
1399 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a)))
1400 return;
1401
1402 if (a[RTA_DST])
1403 net_fill_ip4(&dst, rta_get_ip4(a[RTA_DST]), i->rtm_dst_len);
1404 else
1405 net_fill_ip4(&dst, IP4_NONE, 0);
1406 break;
1407
cc5b93f7
OZ
1408 case AF_INET6:
1409 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a)))
1410 return;
29a64162
OZ
1411
1412 if (a[RTA_DST])
1413 net_fill_ip6(&dst, rta_get_ip6(a[RTA_DST]), i->rtm_dst_len);
1414 else
1415 net_fill_ip6(&dst, IP6_NONE, 0);
1416 break;
1417
d14f8c3c
MM
1418 case AF_MPLS:
1419 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want_mpls, a, sizeof(a)))
1420 return;
1421
ed610044
OZ
1422 if (!a[RTA_DST])
1423 SKIP("MPLS route without RTA_DST");
1424
1425 if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) != 1)
1426 SKIP("MPLS route with multi-label RTA_DST");
1427
1428 net_fill_mpls(&dst, rta_mpls_stack[0]);
d14f8c3c
MM
1429 break;
1430
29a64162
OZ
1431 default:
1432 return;
95616c82
OZ
1433 }
1434
95616c82 1435 if (a[RTA_OIF])
acb04cfd 1436 oif = rta_get_u32(a[RTA_OIF]);
95616c82 1437
9ddbfbdd 1438 if (a[RTA_TABLE])
29a64162 1439 table_id = rta_get_u32(a[RTA_TABLE]);
9ddbfbdd 1440 else
29a64162 1441 table_id = i->rtm_table;
9ddbfbdd 1442
29a64162
OZ
1443 /* Do we know this table? */
1444 p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id);
95616c82 1445 if (!p)
9ddbfbdd 1446 SKIP("unknown table %d\n", table);
95616c82 1447
95616c82
OZ
1448 if (a[RTA_IIF])
1449 SKIP("IIF set\n");
29a64162 1450
95616c82
OZ
1451 if (i->rtm_tos != 0) /* We don't support TOS */
1452 SKIP("TOS %02x\n", i->rtm_tos);
95616c82 1453
2feaa693 1454 if (s->scan && !new)
95616c82
OZ
1455 SKIP("RTM_DELROUTE in scan\n");
1456
2feaa693
OZ
1457 if (a[RTA_PRIORITY])
1458 priority = rta_get_u32(a[RTA_PRIORITY]);
1459
9b136840 1460 int c = net_classify(&dst);
95616c82
OZ
1461 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
1462 SKIP("strange class/scope\n");
1463
95616c82
OZ
1464 switch (i->rtm_protocol)
1465 {
1466 case RTPROT_UNSPEC:
1467 SKIP("proto unspec\n");
1468
1469 case RTPROT_REDIRECT:
1470 src = KRT_SRC_REDIRECT;
1471 break;
1472
1473 case RTPROT_KERNEL:
1474 src = KRT_SRC_KERNEL;
1475 return;
1476
1477 case RTPROT_BIRD:
2feaa693 1478 if (!s->scan)
95616c82
OZ
1479 SKIP("echo\n");
1480 src = KRT_SRC_BIRD;
1481 break;
1482
1483 case RTPROT_BOOT:
1484 default:
1485 src = KRT_SRC_ALIEN;
1486 }
1487
f4a60a9b 1488 net *net = net_get(p->p.main_channel->table, &dst);
95616c82 1489
2feaa693
OZ
1490 if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type))
1491 nl_announce_route(s);
1492
d14f8c3c 1493 rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE);
2feaa693
OZ
1494 ra->src = p->p.main_source;
1495 ra->source = RTS_INHERIT;
1496 ra->scope = SCOPE_UNIVERSE;
95616c82
OZ
1497
1498 switch (i->rtm_type)
1499 {
1500 case RTN_UNICAST:
62e64905 1501 ra->dest = RTD_UNICAST;
95616c82 1502
ad276157 1503 if (a[RTA_MULTIPATH] && (i->rtm_family == AF_INET))
95616c82 1504 {
4e276a89
MM
1505 struct nexthop *nh = nl_parse_multipath(p, a[RTA_MULTIPATH]);
1506 if (!nh)
95616c82 1507 {
fe9f1a6d 1508 log(L_ERR "KRT: Received strange multipath route %N", net->n.addr);
95616c82
OZ
1509 return;
1510 }
9fdf9d29 1511
62e64905 1512 ra->nh = *nh;
95616c82
OZ
1513 break;
1514 }
1515
4e276a89
MM
1516 ra->nh.iface = if_find_by_index(oif);
1517 if (!ra->nh.iface)
95616c82 1518 {
fe9f1a6d 1519 log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif);
95616c82
OZ
1520 return;
1521 }
1522
d14f8c3c 1523 if ((i->rtm_family != AF_MPLS) && a[RTA_GATEWAY] || (i->rtm_family == AF_MPLS) && a[RTA_VIA])
95616c82 1524 {
d14f8c3c
MM
1525 if (i->rtm_family == AF_MPLS)
1526 ra->nh.gw = rta_get_via(a[RTA_VIA]);
1527 else
1528 ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]);
95616c82
OZ
1529
1530 /* Silently skip strange 6to4 routes */
0bf95f99 1531 const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96);
4e276a89 1532 if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit))
95616c82
OZ
1533 return;
1534
23c212e7 1535 neighbor *nbr;
d14f8c3c 1536 nbr = neigh_find2(&p->p, &(ra->nh.gw), ra->nh.iface,
23c212e7
OZ
1537 (i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
1538 if (!nbr || (nbr->scope == SCOPE_HOST))
95616c82 1539 {
4e276a89
MM
1540 log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr,
1541 ra->nh.gw);
95616c82
OZ
1542 return;
1543 }
1544 }
95616c82
OZ
1545
1546 break;
1547 case RTN_BLACKHOLE:
2feaa693 1548 ra->dest = RTD_BLACKHOLE;
95616c82
OZ
1549 break;
1550 case RTN_UNREACHABLE:
2feaa693 1551 ra->dest = RTD_UNREACHABLE;
95616c82
OZ
1552 break;
1553 case RTN_PROHIBIT:
2feaa693 1554 ra->dest = RTD_PROHIBIT;
95616c82
OZ
1555 break;
1556 /* FIXME: What about RTN_THROW? */
1557 default:
1558 SKIP("type %d\n", i->rtm_type);
1559 return;
1560 }
1561
d14f8c3c
MM
1562 int labels = 0;
1563 if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next)
1564 labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label);
1565
1566 if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next)
1567 {
1568 switch (rta_get_u16(a[RTA_ENCAP_TYPE]))
1569 {
1570 case LWTUNNEL_ENCAP_MPLS:
1571 {
1572 struct rtattr *enca[BIRD_RTA_MAX];
1573 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
1574 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
1575 labels = rta_get_mpls(enca[RTA_DST], ra->nh.label);
1576 break;
1577 }
1578 default:
1579 SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE]));
1580 break;
1581 }
1582 }
1583
1584 if (labels < 0)
1585 {
1586 log(L_WARN "KRT: Too long MPLS stack received, ignoring.");
1587 ra->nh.labels = 0;
1588 }
1589 else
1590 ra->nh.labels = labels;
1591
1592 rte *e = rte_get_temp(ra);
1593 e->net = net;
1594 e->u.krt.src = src;
1595 e->u.krt.proto = i->rtm_protocol;
1596 e->u.krt.seen = 0;
1597 e->u.krt.best = 0;
1598 e->u.krt.metric = 0;
1599
6e75d0d2
OZ
1600 if (i->rtm_scope != def_scope)
1601 {
1602 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1603 ea->next = ra->eattrs;
1604 ra->eattrs = ea;
1605 ea->flags = EALF_SORTED;
1606 ea->count = 1;
1607 ea->attrs[0].id = EA_KRT_SCOPE;
1608 ea->attrs[0].flags = 0;
1609 ea->attrs[0].type = EAF_TYPE_INT;
1610 ea->attrs[0].u.data = i->rtm_scope;
1611 }
95616c82 1612
d14f8c3c
MM
1613 if (a[RTA_PRIORITY])
1614 e->u.krt.metric = rta_get_u32(a[RTA_PRIORITY]);
1615
95616c82
OZ
1616 if (a[RTA_PREFSRC])
1617 {
9b136840 1618 ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]);
95616c82 1619
2feaa693
OZ
1620 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1621 ea->next = ra->eattrs;
1622 ra->eattrs = ea;
95616c82
OZ
1623 ea->flags = EALF_SORTED;
1624 ea->count = 1;
1625 ea->attrs[0].id = EA_KRT_PREFSRC;
1626 ea->attrs[0].flags = 0;
1627 ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
2feaa693 1628 ea->attrs[0].u.ptr = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
95616c82
OZ
1629 ea->attrs[0].u.ptr->length = sizeof(ps);
1630 memcpy(ea->attrs[0].u.ptr->data, &ps, sizeof(ps));
1631 }
1632
1633 if (a[RTA_FLOW])
1634 {
2feaa693
OZ
1635 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1636 ea->next = ra->eattrs;
1637 ra->eattrs = ea;
95616c82
OZ
1638 ea->flags = EALF_SORTED;
1639 ea->count = 1;
1640 ea->attrs[0].id = EA_KRT_REALM;
1641 ea->attrs[0].flags = 0;
1642 ea->attrs[0].type = EAF_TYPE_INT;
acb04cfd 1643 ea->attrs[0].u.data = rta_get_u32(a[RTA_FLOW]);
95616c82
OZ
1644 }
1645
9fdf9d29
OZ
1646 if (a[RTA_METRICS])
1647 {
1648 u32 metrics[KRT_METRICS_MAX];
2feaa693 1649 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
9fdf9d29
OZ
1650 int t, n = 0;
1651
1652 if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
1653 {
fe9f1a6d 1654 log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net->n.addr);
9fdf9d29
OZ
1655 return;
1656 }
1657
1658 for (t = 1; t < KRT_METRICS_MAX; t++)
1659 if (metrics[0] & (1 << t))
1660 {
1661 ea->attrs[n].id = EA_CODE(EAP_KRT, KRT_METRICS_OFFSET + t);
1662 ea->attrs[n].flags = 0;
1663 ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */
1664 ea->attrs[n].u.data = metrics[t];
1665 n++;
1666 }
1667
1668 if (n > 0)
1669 {
2feaa693 1670 ea->next = ra->eattrs;
9fdf9d29
OZ
1671 ea->flags = EALF_SORTED;
1672 ea->count = n;
2feaa693 1673 ra->eattrs = ea;
9fdf9d29
OZ
1674 }
1675 }
1676
2feaa693
OZ
1677 /*
1678 * Ideally, now we would send the received route to the rest of kernel code.
1679 * But IPv6 ECMP routes are sent as a sequence of routes, so we postpone it
1680 * and merge next hops until the end of the sequence.
1681 */
1682
1683 if (!s->net)
1684 {
1685 /* Store the new route */
1686 s->net = net;
1687 s->attrs = ra;
1688 s->proto = p;
1689 s->new = new;
1690 s->krt_src = src;
1691 s->krt_type = i->rtm_type;
1692 s->krt_proto = i->rtm_protocol;
1693 s->krt_metric = priority;
1694 }
95616c82 1695 else
2feaa693
OZ
1696 {
1697 /* Merge next hops with the stored route */
62e64905 1698 rta *oa = s->attrs;
2feaa693 1699
62e64905
OZ
1700 struct nexthop *nhs = &oa->nh;
1701 nexthop_insert(&nhs, &ra->nh);
1702
1703 /* Perhaps new nexthop is inserted at the first position */
1704 if (nhs == &ra->nh)
1705 {
1706 /* Swap rtas */
1707 s->attrs = ra;
1708
1709 /* Keep old eattrs */
1710 ra->eattrs = oa->eattrs;
1711 }
2feaa693 1712 }
95616c82
OZ
1713}
1714
1715void
1716krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
1717{
1718 struct nlmsghdr *h;
2feaa693 1719 struct nl_parse_state s;
95616c82 1720
cc5b93f7 1721 nl_parse_begin(&s, 1, 0);
d7661fbe 1722 nl_request_dump(AF_INET, RTM_GETROUTE);
95616c82
OZ
1723 while (h = nl_get_scan())
1724 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
2feaa693 1725 nl_parse_route(&s, h);
95616c82
OZ
1726 else
1727 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
cc5b93f7 1728 nl_parse_end(&s);
29a64162 1729
cc5b93f7 1730 nl_parse_begin(&s, 1, 1);
d7661fbe
MM
1731 nl_request_dump(AF_INET6, RTM_GETROUTE);
1732 while (h = nl_get_scan())
1733 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
cc5b93f7 1734 nl_parse_route(&s, h);
d7661fbe
MM
1735 else
1736 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
2feaa693 1737 nl_parse_end(&s);
d14f8c3c
MM
1738
1739 nl_parse_begin(&s, 1, 1);
1740 nl_request_dump(AF_MPLS, RTM_GETROUTE);
1741 while (h = nl_get_scan())
1742 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
1743 nl_parse_route(&s, h);
1744 else
1745 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
1746 nl_parse_end(&s);
95616c82
OZ
1747}
1748
1749/*
1750 * Asynchronous Netlink interface
1751 */
1752
1753static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
1754static byte *nl_async_rx_buffer; /* Receive buffer */
1755
1756static void
1757nl_async_msg(struct nlmsghdr *h)
1758{
2feaa693
OZ
1759 struct nl_parse_state s;
1760
95616c82
OZ
1761 switch (h->nlmsg_type)
1762 {
1763 case RTM_NEWROUTE:
1764 case RTM_DELROUTE:
1765 DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
2feaa693
OZ
1766 nl_parse_begin(&s, 0, 0);
1767 nl_parse_route(&s, h);
1768 nl_parse_end(&s);
95616c82
OZ
1769 break;
1770 case RTM_NEWLINK:
1771 case RTM_DELLINK:
1772 DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
1e4891e4
OZ
1773 if (kif_proto)
1774 nl_parse_link(h, 0);
95616c82
OZ
1775 break;
1776 case RTM_NEWADDR:
1777 case RTM_DELADDR:
1778 DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
1e4891e4
OZ
1779 if (kif_proto)
1780 nl_parse_addr(h, 0);
95616c82
OZ
1781 break;
1782 default:
1783 DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
1784 }
1785}
1786
1787static int
3e236955 1788nl_async_hook(sock *sk, uint size UNUSED)
95616c82
OZ
1789{
1790 struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
1791 struct sockaddr_nl sa;
31e9e101
ST
1792 struct msghdr m = {
1793 .msg_name = &sa,
1794 .msg_namelen = sizeof(sa),
1795 .msg_iov = &iov,
1796 .msg_iovlen = 1,
1797 };
95616c82
OZ
1798 struct nlmsghdr *h;
1799 int x;
ae80a2de 1800 uint len;
95616c82
OZ
1801
1802 x = recvmsg(sk->fd, &m, 0);
1803 if (x < 0)
1804 {
1805 if (errno == ENOBUFS)
1806 {
1807 /*
1808 * Netlink reports some packets have been thrown away.
1809 * One day we might react to it by asking for route table
1810 * scan in near future.
1811 */
2c33da50 1812 log(L_WARN "Kernel dropped some netlink messages, will resync on next scan.");
95616c82
OZ
1813 return 1; /* More data are likely to be ready */
1814 }
1815 else if (errno != EWOULDBLOCK)
1816 log(L_ERR "Netlink recvmsg: %m");
1817 return 0;
1818 }
1819 if (sa.nl_pid) /* It isn't from the kernel */
1820 {
1821 DBG("Non-kernel packet\n");
1822 return 1;
1823 }
1824 h = (void *) nl_async_rx_buffer;
1825 len = x;
1826 if (m.msg_flags & MSG_TRUNC)
1827 {
1828 log(L_WARN "Netlink got truncated asynchronous message");
1829 return 1;
1830 }
1831 while (NLMSG_OK(h, len))
1832 {
1833 nl_async_msg(h);
1834 h = NLMSG_NEXT(h, len);
1835 }
1836 if (len)
1837 log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
1838 return 1;
1839}
1840
ccd2a3ed
MM
1841static void
1842nl_async_err_hook(sock *sk, int e UNUSED)
1843{
1844 nl_async_hook(sk, 0);
1845}
1846
95616c82
OZ
1847static void
1848nl_open_async(void)
1849{
1850 sock *sk;
1851 struct sockaddr_nl sa;
1852 int fd;
95616c82 1853
f83ce94d 1854 if (nl_async_sk)
95616c82 1855 return;
95616c82
OZ
1856
1857 DBG("KRT: Opening async netlink socket\n");
1858
1859 fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1860 if (fd < 0)
1861 {
1862 log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
1863 return;
1864 }
1865
1866 bzero(&sa, sizeof(sa));
1867 sa.nl_family = AF_NETLINK;
29a64162
OZ
1868 sa.nl_groups = RTMGRP_LINK |
1869 RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE |
1870 RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
1871
95616c82
OZ
1872 if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
1873 {
1874 log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
f83ce94d 1875 close(fd);
95616c82
OZ
1876 return;
1877 }
1878
f83ce94d
OZ
1879 nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
1880
95616c82
OZ
1881 sk = nl_async_sk = sk_new(krt_pool);
1882 sk->type = SK_MAGIC;
1883 sk->rx_hook = nl_async_hook;
ccd2a3ed 1884 sk->err_hook = nl_async_err_hook;
95616c82 1885 sk->fd = fd;
05476c4d 1886 if (sk_open(sk) < 0)
95616c82 1887 bug("Netlink: sk_open failed");
95616c82
OZ
1888}
1889
9ddbfbdd 1890
95616c82
OZ
1891/*
1892 * Interface to the UNIX krt module
1893 */
1894
95616c82 1895void
9ddbfbdd
MM
1896krt_sys_io_init(void)
1897{
2feaa693 1898 nl_linpool = lp_new(krt_pool, 4080);
9ddbfbdd
MM
1899 HASH_INIT(nl_table_map, krt_pool, 6);
1900}
1901
1902int
c6964c30 1903krt_sys_start(struct krt_proto *p)
95616c82 1904{
29a64162 1905 struct krt_proto *old = HASH_FIND(nl_table_map, RTH, p->af, krt_table_id(p));
9ddbfbdd
MM
1906
1907 if (old)
1908 {
1909 log(L_ERR "%s: Kernel table %u already registered by %s",
1910 p->p.name, krt_table_id(p), old->p.name);
1911 return 0;
1912 }
1913
1914 HASH_INSERT2(nl_table_map, RTH, krt_pool, p);
c6964c30
OZ
1915
1916 nl_open();
1917 nl_open_async();
9ddbfbdd
MM
1918
1919 return 1;
95616c82
OZ
1920}
1921
1922void
9ddbfbdd 1923krt_sys_shutdown(struct krt_proto *p)
95616c82 1924{
9ddbfbdd 1925 HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
95616c82
OZ
1926}
1927
1928int
1929krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
1930{
4adcb9df 1931 return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
95616c82
OZ
1932}
1933
95616c82
OZ
1934void
1935krt_sys_init_config(struct krt_config *cf)
1936{
1937 cf->sys.table_id = RT_TABLE_MAIN;
bff21441 1938 cf->sys.metric = 32;
95616c82
OZ
1939}
1940
1941void
1942krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
1943{
1944 d->sys.table_id = s->sys.table_id;
4adcb9df 1945 d->sys.metric = s->sys.metric;
95616c82
OZ
1946}
1947
9fdf9d29
OZ
1948static const char *krt_metrics_names[KRT_METRICS_MAX] = {
1949 NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
1950 "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
1951};
1952
1953static const char *krt_features_names[KRT_FEATURES_MAX] = {
1954 "ecn", NULL, NULL, "allfrag"
1955};
1956
1957int
1958krt_sys_get_attr(eattr *a, byte *buf, int buflen UNUSED)
1959{
1960 switch (a->id)
1961 {
1962 case EA_KRT_PREFSRC:
1963 bsprintf(buf, "prefsrc");
1964 return GA_NAME;
1965
1966 case EA_KRT_REALM:
1967 bsprintf(buf, "realm");
1968 return GA_NAME;
1969
6e75d0d2
OZ
1970 case EA_KRT_SCOPE:
1971 bsprintf(buf, "scope");
1972 return GA_NAME;
1973
9fdf9d29
OZ
1974 case EA_KRT_LOCK:
1975 buf += bsprintf(buf, "lock:");
1976 ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
1977 return GA_FULL;
1978
1979 case EA_KRT_FEATURES:
1980 buf += bsprintf(buf, "features:");
1981 ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
1982 return GA_FULL;
1983
1984 default:;
1985 int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET;
1986 if (id > 0 && id < KRT_METRICS_MAX)
1987 {
1988 bsprintf(buf, "%s", krt_metrics_names[id]);
1989 return GA_NAME;
1990 }
1991
1992 return GA_UNKNOWN;
1993 }
1994}
1995
95616c82
OZ
1996
1997
1998void
1999kif_sys_start(struct kif_proto *p UNUSED)
2000{
2001 nl_open();
2002 nl_open_async();
2003}
2004
2005void
2006kif_sys_shutdown(struct kif_proto *p UNUSED)
2007{
2008}