]> git.ipfire.org Git - thirdparty/bird.git/blame - sysdep/linux/netlink.c
KRT: Fix removal of KRF_INSTALLED
[thirdparty/bird.git] / sysdep / linux / netlink.c
CommitLineData
95616c82
OZ
1/*
2 * BIRD -- Linux Netlink Interface
3 *
4 * (c) 1999--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
a8caff32 9#include <alloca.h>
95616c82 10#include <stdio.h>
f83ce94d 11#include <unistd.h>
95616c82
OZ
12#include <fcntl.h>
13#include <sys/socket.h>
14#include <sys/uio.h>
15#include <errno.h>
16
17#undef LOCAL_DEBUG
18
19#include "nest/bird.h"
20#include "nest/route.h"
21#include "nest/protocol.h"
22#include "nest/iface.h"
4e276a89 23#include "lib/alloca.h"
7152e5ef
JMM
24#include "sysdep/unix/unix.h"
25#include "sysdep/unix/krt.h"
95616c82
OZ
26#include "lib/socket.h"
27#include "lib/string.h"
9ddbfbdd 28#include "lib/hash.h"
95616c82
OZ
29#include "conf/conf.h"
30
31#include <asm/types.h>
32#include <linux/if.h>
33#include <linux/netlink.h>
34#include <linux/rtnetlink.h>
35
6b0f5f68
MJM
36#ifdef HAVE_MPLS_KERNEL
37#include <linux/lwtunnel.h>
38#endif
9ddbfbdd 39
95616c82
OZ
40#ifndef MSG_TRUNC /* Hack: Several versions of glibc miss this one :( */
41#define MSG_TRUNC 0x20
42#endif
43
a08a81c6
OZ
44#ifndef IFA_FLAGS
45#define IFA_FLAGS 8
46#endif
47
95616c82
OZ
48#ifndef IFF_LOWER_UP
49#define IFF_LOWER_UP 0x10000
50#endif
51
9ddbfbdd
JMM
52#ifndef RTA_TABLE
53#define RTA_TABLE 15
54#endif
55
d14f8c3c
JMM
56#ifndef RTA_VIA
57#define RTA_VIA 18
58#endif
59
60#ifndef RTA_NEWDST
61#define RTA_NEWDST 19
62#endif
63
64#ifndef RTA_ENCAP_TYPE
65#define RTA_ENCAP_TYPE 21
66#endif
67
68#ifndef RTA_ENCAP
69#define RTA_ENCAP 22
70#endif
9ddbfbdd 71
8235c474 72#define krt_ipv4(p) ((p)->af == AF_INET)
cc5b93f7 73#define krt_ecmp6(p) ((p)->af == AF_INET6)
2feaa693 74
517d05df
OZ
75const int rt_default_ecmp = 16;
76
2feaa693
OZ
77/*
78 * Structure nl_parse_state keeps state of received route processing. Ideally,
79 * we could just independently parse received Netlink messages and immediately
98bb80a2
OZ
80 * propagate received routes to the rest of BIRD, but older Linux kernel (before
81 * version 4.11) represents and announces IPv6 ECMP routes not as one route with
82 * multiple next hops (like RTA_MULTIPATH in IPv4 ECMP), but as a sequence of
83 * routes with the same prefix. More recent kernels work as with IPv4.
2feaa693
OZ
84 *
85 * Therefore, BIRD keeps currently processed route in nl_parse_state structure
86 * and postpones its propagation until we expect it to be final; i.e., when
87 * non-matching route is received or when the scan ends. When another matching
88 * route is received, it is merged with the already processed route to form an
89 * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
98bb80a2
OZ
90 * postponing is done in both cases (for simplicity). All IPv4 routes or IPv6
91 * routes with RTA_MULTIPATH set are just considered non-matching.
2feaa693
OZ
92 *
93 * This is ignored for asynchronous notifications (every notification is handled
94 * as a separate route). It is not an issue for our routes, as we ignore such
95 * notifications anyways. But importing alien IPv6 ECMP routes does not work
98bb80a2
OZ
96 * properly with older kernels.
97 *
98 * Whatever the kernel version is, IPv6 ECMP routes are sent as multiple routes
99 * for the same prefix.
2feaa693
OZ
100 */
101
102struct nl_parse_state
103{
104 struct linpool *pool;
105 int scan;
106 int merge;
107
108 net *net;
109 rta *attrs;
110 struct krt_proto *proto;
111 s8 new;
112 s8 krt_src;
113 u8 krt_type;
114 u8 krt_proto;
115 u32 krt_metric;
116};
117
95616c82
OZ
118/*
119 * Synchronous Netlink interface
120 */
121
122struct nl_sock
123{
124 int fd;
125 u32 seq;
126 byte *rx_buffer; /* Receive buffer */
127 struct nlmsghdr *last_hdr; /* Recently received packet */
ae80a2de 128 uint last_size;
95616c82
OZ
129};
130
131#define NL_RX_SIZE 8192
132
2feaa693
OZ
133#define NL_OP_DELETE 0
134#define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
135#define NL_OP_REPLACE (NLM_F_CREATE|NLM_F_REPLACE)
136#define NL_OP_APPEND (NLM_F_CREATE|NLM_F_APPEND)
137
138static linpool *nl_linpool;
139
95616c82
OZ
140static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for synchronous scan */
141static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */
142
143static void
144nl_open_sock(struct nl_sock *nl)
145{
146 if (nl->fd < 0)
147 {
148 nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
149 if (nl->fd < 0)
150 die("Unable to open rtnetlink socket: %m");
574b2324 151 nl->seq = (u32) (current_time() TO_S); /* Or perhaps random_u32() ? */
95616c82
OZ
152 nl->rx_buffer = xmalloc(NL_RX_SIZE);
153 nl->last_hdr = NULL;
154 nl->last_size = 0;
155 }
156}
157
158static void
159nl_open(void)
160{
161 nl_open_sock(&nl_scan);
162 nl_open_sock(&nl_req);
163}
164
165static void
166nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
167{
168 struct sockaddr_nl sa;
169
170 memset(&sa, 0, sizeof(sa));
171 sa.nl_family = AF_NETLINK;
172 nh->nlmsg_pid = 0;
173 nh->nlmsg_seq = ++(nl->seq);
53401bef 174 nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len);
95616c82
OZ
175 if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
176 die("rtnetlink sendto: %m");
177 nl->last_hdr = NULL;
178}
179
180static void
86c3eea0 181nl_request_dump(int af, int cmd)
95616c82
OZ
182{
183 struct {
184 struct nlmsghdr nh;
185 struct rtgenmsg g;
641172c6
OZ
186 } req = {
187 .nh.nlmsg_type = cmd,
188 .nh.nlmsg_len = sizeof(req),
189 .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
190 .g.rtgen_family = af
191 };
95616c82
OZ
192 nl_send(&nl_scan, &req.nh);
193}
194
195static struct nlmsghdr *
196nl_get_reply(struct nl_sock *nl)
197{
198 for(;;)
199 {
200 if (!nl->last_hdr)
201 {
202 struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
203 struct sockaddr_nl sa;
31e9e101
ST
204 struct msghdr m = {
205 .msg_name = &sa,
206 .msg_namelen = sizeof(sa),
207 .msg_iov = &iov,
208 .msg_iovlen = 1,
209 };
95616c82
OZ
210 int x = recvmsg(nl->fd, &m, 0);
211 if (x < 0)
212 die("nl_get_reply: %m");
213 if (sa.nl_pid) /* It isn't from the kernel */
214 {
215 DBG("Non-kernel packet\n");
216 continue;
217 }
218 nl->last_size = x;
219 nl->last_hdr = (void *) nl->rx_buffer;
220 if (m.msg_flags & MSG_TRUNC)
221 bug("nl_get_reply: got truncated reply which should be impossible");
222 }
223 if (NLMSG_OK(nl->last_hdr, nl->last_size))
224 {
225 struct nlmsghdr *h = nl->last_hdr;
226 nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
227 if (h->nlmsg_seq != nl->seq)
228 {
229 log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
230 h->nlmsg_seq, nl->seq);
231 continue;
232 }
233 return h;
234 }
235 if (nl->last_size)
236 log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
237 nl->last_hdr = NULL;
238 }
239}
240
1123e707 241static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
95616c82
OZ
242
243static int
2feaa693 244nl_error(struct nlmsghdr *h, int ignore_esrch)
95616c82
OZ
245{
246 struct nlmsgerr *e;
247 int ec;
248
249 if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
250 {
251 log(L_WARN "Netlink: Truncated error message received");
252 return ENOBUFS;
253 }
254 e = (struct nlmsgerr *) NLMSG_DATA(h);
255 ec = -e->error;
2feaa693 256 if (ec && !(ignore_esrch && (ec == ESRCH)))
95616c82
OZ
257 log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
258 return ec;
259}
260
261static struct nlmsghdr *
262nl_get_scan(void)
263{
264 struct nlmsghdr *h = nl_get_reply(&nl_scan);
265
266 if (h->nlmsg_type == NLMSG_DONE)
267 return NULL;
268 if (h->nlmsg_type == NLMSG_ERROR)
269 {
2feaa693 270 nl_error(h, 0);
95616c82
OZ
271 return NULL;
272 }
273 return h;
274}
275
276static int
2feaa693 277nl_exchange(struct nlmsghdr *pkt, int ignore_esrch)
95616c82
OZ
278{
279 struct nlmsghdr *h;
280
281 nl_send(&nl_req, pkt);
282 for(;;)
283 {
284 h = nl_get_reply(&nl_req);
285 if (h->nlmsg_type == NLMSG_ERROR)
286 break;
287 log(L_WARN "nl_exchange: Unexpected reply received");
288 }
2feaa693 289 return nl_error(h, ignore_esrch) ? -1 : 0;
95616c82
OZ
290}
291
292/*
293 * Netlink attributes
294 */
295
296static int nl_attr_len;
297
298static void *
299nl_checkin(struct nlmsghdr *h, int lsize)
300{
301 nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
302 if (nl_attr_len < 0)
303 {
304 log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
305 return NULL;
306 }
307 return NLMSG_DATA(h);
308}
309
ad276157
JMM
310struct nl_want_attrs {
311 u8 defined:1;
312 u8 checksize:1;
313 u8 size;
314};
315
316
317#define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
318
319static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = {
320 [IFLA_IFNAME] = { 1, 0, 0 },
321 [IFLA_MTU] = { 1, 1, sizeof(u32) },
943478b0 322 [IFLA_MASTER] = { 1, 1, sizeof(u32) },
ad276157
JMM
323 [IFLA_WIRELESS] = { 1, 0, 0 },
324};
325
29a64162 326
e37d2e3e 327#define BIRD_IFA_MAX (IFA_FLAGS+1)
ad276157 328
ad276157
JMM
329static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = {
330 [IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) },
331 [IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) },
332 [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) },
62e64905 333 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
ad276157 334};
29a64162 335
ad276157
JMM
336static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
337 [IFA_ADDRESS] = { 1, 1, sizeof(ip6_addr) },
338 [IFA_LOCAL] = { 1, 1, sizeof(ip6_addr) },
e37d2e3e 339 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
ad276157 340};
29a64162 341
ad276157 342
d14f8c3c 343#define BIRD_RTA_MAX (RTA_ENCAP+1)
ad276157 344
4e276a89 345static struct nl_want_attrs nexthop_attr_want4[BIRD_RTA_MAX] = {
ad276157 346 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
53401bef 347 [RTA_VIA] = { 1, 0, 0 },
d14f8c3c
JMM
348 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
349 [RTA_ENCAP] = { 1, 0, 0 },
350};
351
4ff15a75 352static struct nl_want_attrs nexthop_attr_want6[BIRD_RTA_MAX] = {
98bb80a2 353 [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
53401bef 354 [RTA_VIA] = { 1, 0, 0 },
4ff15a75
OZ
355 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
356 [RTA_ENCAP] = { 1, 0, 0 },
357};
358
6b0f5f68 359#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
360static struct nl_want_attrs encap_mpls_want[BIRD_RTA_MAX] = {
361 [RTA_DST] = { 1, 0, 0 },
ad276157 362};
6b0f5f68 363#endif
ad276157 364
ad276157
JMM
365static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
366 [RTA_DST] = { 1, 1, sizeof(ip4_addr) },
367 [RTA_OIF] = { 1, 1, sizeof(u32) },
368 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
369 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
370 [RTA_PREFSRC] = { 1, 1, sizeof(ip4_addr) },
371 [RTA_METRICS] = { 1, 0, 0 },
372 [RTA_MULTIPATH] = { 1, 0, 0 },
373 [RTA_FLOW] = { 1, 1, sizeof(u32) },
374 [RTA_TABLE] = { 1, 1, sizeof(u32) },
53401bef 375 [RTA_VIA] = { 1, 0, 0 },
d14f8c3c
JMM
376 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
377 [RTA_ENCAP] = { 1, 0, 0 },
ad276157 378};
29a64162 379
ad276157
JMM
380static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
381 [RTA_DST] = { 1, 1, sizeof(ip6_addr) },
be17805c 382 [RTA_SRC] = { 1, 1, sizeof(ip6_addr) },
ad276157
JMM
383 [RTA_IIF] = { 1, 1, sizeof(u32) },
384 [RTA_OIF] = { 1, 1, sizeof(u32) },
385 [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
386 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
387 [RTA_PREFSRC] = { 1, 1, sizeof(ip6_addr) },
388 [RTA_METRICS] = { 1, 0, 0 },
98bb80a2 389 [RTA_MULTIPATH] = { 1, 0, 0 },
ad276157
JMM
390 [RTA_FLOW] = { 1, 1, sizeof(u32) },
391 [RTA_TABLE] = { 1, 1, sizeof(u32) },
53401bef 392 [RTA_VIA] = { 1, 0, 0 },
d14f8c3c
JMM
393 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
394 [RTA_ENCAP] = { 1, 0, 0 },
395};
396
6b0f5f68 397#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
398static struct nl_want_attrs rtm_attr_want_mpls[BIRD_RTA_MAX] = {
399 [RTA_DST] = { 1, 1, sizeof(u32) },
400 [RTA_IIF] = { 1, 1, sizeof(u32) },
401 [RTA_OIF] = { 1, 1, sizeof(u32) },
402 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
403 [RTA_METRICS] = { 1, 0, 0 },
404 [RTA_FLOW] = { 1, 1, sizeof(u32) },
405 [RTA_TABLE] = { 1, 1, sizeof(u32) },
406 [RTA_VIA] = { 1, 0, 0 },
407 [RTA_NEWDST] = { 1, 0, 0 },
ad276157 408};
6b0f5f68 409#endif
ad276157
JMM
410
411
95616c82 412static int
ad276157 413nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, int ksize)
95616c82
OZ
414{
415 int max = ksize / sizeof(struct rtattr *);
416 bzero(k, ksize);
ad276157
JMM
417
418 for ( ; RTA_OK(a, nl_attr_len); a = RTA_NEXT(a, nl_attr_len))
95616c82 419 {
ad276157
JMM
420 if ((a->rta_type >= max) || !want[a->rta_type].defined)
421 continue;
422
423 if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size))
424 {
9b136840 425 log(L_ERR "nl_parse_attrs: Malformed attribute received");
ad276157
JMM
426 return 0;
427 }
428
429 k[a->rta_type] = a;
95616c82 430 }
ad276157 431
95616c82
OZ
432 if (nl_attr_len)
433 {
434 log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
435 return 0;
436 }
ad276157
JMM
437
438 return 1;
95616c82
OZ
439}
440
d14f8c3c
JMM
441static inline u16 rta_get_u16(struct rtattr *a)
442{ return *(u16 *) RTA_DATA(a); }
443
fce764f9 444static inline u32 rta_get_u32(struct rtattr *a)
acb04cfd
OZ
445{ return *(u32 *) RTA_DATA(a); }
446
447static inline ip4_addr rta_get_ip4(struct rtattr *a)
448{ return ip4_ntoh(*(ip4_addr *) RTA_DATA(a)); }
449
450static inline ip6_addr rta_get_ip6(struct rtattr *a)
451{ return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); }
452
9b136840
JMM
453static inline ip_addr rta_get_ipa(struct rtattr *a)
454{
455 if (RTA_PAYLOAD(a) == sizeof(ip4_addr))
456 return ipa_from_ip4(rta_get_ip4(a));
457 else
458 return ipa_from_ip6(rta_get_ip6(a));
459}
acb04cfd 460
6b0f5f68 461#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
462static inline ip_addr rta_get_via(struct rtattr *a)
463{
464 struct rtvia *v = RTA_DATA(a);
465 switch(v->rtvia_family) {
466 case AF_INET: return ipa_from_ip4(ip4_ntoh(*(ip4_addr *) v->rtvia_addr));
467 case AF_INET6: return ipa_from_ip6(ip6_ntoh(*(ip6_addr *) v->rtvia_addr));
468 }
469 return IPA_NONE;
470}
471
472static u32 rta_mpls_stack[MPLS_MAX_LABEL_STACK];
473static inline int rta_get_mpls(struct rtattr *a, u32 *stack)
474{
2eaf65ec
OZ
475 if (!a)
476 return 0;
477
d14f8c3c
JMM
478 if (RTA_PAYLOAD(a) % 4)
479 log(L_WARN "KRT: Strange length of received MPLS stack: %u", RTA_PAYLOAD(a));
480
2eaf65ec
OZ
481 int labels = mpls_get(RTA_DATA(a), RTA_PAYLOAD(a) & ~0x3, stack);
482
483 if (labels < 0)
484 {
485 log(L_WARN "KRT: Too long MPLS stack received, ignoring");
486 labels = 0;
487 }
488
489 return labels;
d14f8c3c 490}
6b0f5f68 491#endif
d14f8c3c 492
9fdf9d29
OZ
493struct rtattr *
494nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen)
95616c82 495{
9fdf9d29
OZ
496 uint pos = NLMSG_ALIGN(h->nlmsg_len);
497 uint len = RTA_LENGTH(dlen);
95616c82
OZ
498
499 if (pos + len > bufsize)
500 bug("nl_add_attr: packet buffer overflow");
9fdf9d29
OZ
501
502 struct rtattr *a = (struct rtattr *)((char *)h + pos);
95616c82
OZ
503 a->rta_type = code;
504 a->rta_len = len;
505 h->nlmsg_len = pos + len;
9fdf9d29
OZ
506
507 if (dlen > 0)
508 memcpy(RTA_DATA(a), data, dlen);
509
510 return a;
95616c82
OZ
511}
512
d14f8c3c
JMM
513static inline struct rtattr *
514nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
515{
516 return nl_add_attr(h, bufsize, code, NULL, 0);
517}
518
519static inline void
520nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
521{
522 a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a;
523}
524
525static inline void
526nl_add_attr_u16(struct nlmsghdr *h, uint bufsize, int code, u16 data)
527{
528 nl_add_attr(h, bufsize, code, &data, 2);
529}
530
95616c82 531static inline void
29a64162 532nl_add_attr_u32(struct nlmsghdr *h, uint bufsize, int code, u32 data)
95616c82
OZ
533{
534 nl_add_attr(h, bufsize, code, &data, 4);
535}
536
537static inline void
29a64162 538nl_add_attr_ip4(struct nlmsghdr *h, uint bufsize, int code, ip4_addr ip4)
95616c82 539{
29a64162
OZ
540 ip4 = ip4_hton(ip4);
541 nl_add_attr(h, bufsize, code, &ip4, sizeof(ip4));
542}
543
544static inline void
545nl_add_attr_ip6(struct nlmsghdr *h, uint bufsize, int code, ip6_addr ip6)
546{
547 ip6 = ip6_hton(ip6);
548 nl_add_attr(h, bufsize, code, &ip6, sizeof(ip6));
549}
550
551static inline void
552nl_add_attr_ipa(struct nlmsghdr *h, uint bufsize, int code, ip_addr ipa)
553{
554 if (ipa_is_ip4(ipa))
555 nl_add_attr_ip4(h, bufsize, code, ipa_to_ip4(ipa));
9b136840 556 else
29a64162 557 nl_add_attr_ip6(h, bufsize, code, ipa_to_ip6(ipa));
95616c82
OZ
558}
559
6b0f5f68 560#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
561static inline void
562nl_add_attr_mpls(struct nlmsghdr *h, uint bufsize, int code, int len, u32 *stack)
9fdf9d29 563{
d14f8c3c
JMM
564 char buf[len*4];
565 mpls_put(buf, len, stack);
566 nl_add_attr(h, bufsize, code, buf, len*4);
9fdf9d29 567}
95616c82
OZ
568
569static inline void
d14f8c3c 570nl_add_attr_mpls_encap(struct nlmsghdr *h, uint bufsize, int len, u32 *stack)
95616c82 571{
d14f8c3c
JMM
572 nl_add_attr_u16(h, bufsize, RTA_ENCAP_TYPE, LWTUNNEL_ENCAP_MPLS);
573
574 struct rtattr *nest = nl_open_attr(h, bufsize, RTA_ENCAP);
575 nl_add_attr_mpls(h, bufsize, RTA_DST, len, stack);
576 nl_close_attr(h, nest);
577}
578
579static inline void
580nl_add_attr_via(struct nlmsghdr *h, uint bufsize, ip_addr ipa)
581{
66acbc8d 582 struct rtvia *via = alloca(sizeof(struct rtvia) + 16);
d14f8c3c 583
62e64905
OZ
584 if (ipa_is_ip4(ipa))
585 {
d14f8c3c 586 via->rtvia_family = AF_INET;
62e64905 587 put_ip4(via->rtvia_addr, ipa_to_ip4(ipa));
66acbc8d 588 nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 4);
62e64905
OZ
589 }
590 else
591 {
d14f8c3c 592 via->rtvia_family = AF_INET6;
62e64905 593 put_ip6(via->rtvia_addr, ipa_to_ip6(ipa));
66acbc8d 594 nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 16);
d14f8c3c 595 }
95616c82 596}
6b0f5f68 597#endif
95616c82 598
9fdf9d29
OZ
599static inline struct rtnexthop *
600nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
601{
602 uint pos = NLMSG_ALIGN(h->nlmsg_len);
603 uint len = RTNH_LENGTH(0);
604
605 if (pos + len > bufsize)
606 bug("nl_open_nexthop: packet buffer overflow");
607
608 h->nlmsg_len = pos + len;
609
610 return (void *)h + pos;
611}
612
613static inline void
614nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh)
615{
616 nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh;
617}
95616c82 618
d14f8c3c 619static inline void
6b0f5f68 620nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af UNUSED)
d14f8c3c 621{
6b0f5f68 622#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
623 if (nh->labels > 0)
624 if (af == AF_MPLS)
625 nl_add_attr_mpls(h, bufsize, RTA_NEWDST, nh->labels, nh->label);
626 else
627 nl_add_attr_mpls_encap(h, bufsize, nh->labels, nh->label);
628
629 if (ipa_nonzero(nh->gw))
53401bef
OZ
630 {
631 if (af == (ipa_is_ip4(nh->gw) ? AF_INET : AF_INET6))
d14f8c3c 632 nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
53401bef
OZ
633 else
634 nl_add_attr_via(h, bufsize, nh->gw);
635 }
6b0f5f68
MJM
636#else
637
638 if (ipa_nonzero(nh->gw))
639 nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
640#endif
d14f8c3c
JMM
641}
642
95616c82 643static void
d14f8c3c 644nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af)
95616c82 645{
9fdf9d29
OZ
646 struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH);
647
95616c82 648 for (; nh; nh = nh->next)
9fdf9d29
OZ
649 {
650 struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize);
95616c82 651
9fdf9d29
OZ
652 rtnh->rtnh_flags = 0;
653 rtnh->rtnh_hops = nh->weight;
654 rtnh->rtnh_ifindex = nh->iface->index;
95616c82 655
d14f8c3c 656 nl_add_nexthop(h, bufsize, nh, af);
95616c82 657
a1f5e514
OZ
658 if (nh->flags & RNF_ONLINK)
659 rtnh->rtnh_flags |= RTNH_F_ONLINK;
660
9fdf9d29
OZ
661 nl_close_nexthop(h, rtnh);
662 }
663
664 nl_close_attr(h, a);
665}
95616c82 666
4e276a89 667static struct nexthop *
3e792350 668nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr *ra, int af)
95616c82 669{
ad276157 670 struct rtattr *a[BIRD_RTA_MAX];
95616c82 671 struct rtnexthop *nh = RTA_DATA(ra);
4e276a89 672 struct nexthop *rv, *first, **last;
3e236955 673 unsigned len = RTA_PAYLOAD(ra);
95616c82
OZ
674
675 first = NULL;
676 last = &first;
95616c82
OZ
677
678 while (len)
679 {
680 /* Use RTNH_OK(nh,len) ?? */
681 if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
682 return NULL;
683
3e792350 684 *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE);
95616c82
OZ
685 last = &(rv->next);
686
687 rv->weight = nh->rtnh_hops;
688 rv->iface = if_find_by_index(nh->rtnh_ifindex);
689 if (!rv->iface)
690 return NULL;
691
692 /* Nonexistent RTNH_PAYLOAD ?? */
693 nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
98bb80a2
OZ
694 switch (af)
695 {
98bb80a2 696 case AF_INET:
4ff15a75 697 if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a)))
98bb80a2
OZ
698 return NULL;
699 break;
4ff15a75 700
98bb80a2 701 case AF_INET6:
4ff15a75 702 if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want6, a, sizeof(a)))
98bb80a2
OZ
703 return NULL;
704 break;
4ff15a75 705
98bb80a2
OZ
706 default:
707 return NULL;
708 }
709
95616c82 710 if (a[RTA_GATEWAY])
53401bef 711 rv->gw = rta_get_ipa(a[RTA_GATEWAY]);
95616c82 712
53401bef
OZ
713#ifdef HAVE_MPLS_KERNEL
714 if (a[RTA_VIA])
715 rv->gw = rta_get_via(a[RTA_VIA]);
716#endif
717
718 if (ipa_nonzero(rv->gw))
719 {
a1f5e514
OZ
720 if (nh->rtnh_flags & RTNH_F_ONLINK)
721 rv->flags |= RNF_ONLINK;
722
23c212e7 723 neighbor *nbr;
586c1800
OZ
724 nbr = neigh_find(&p->p, rv->gw, rv->iface,
725 (rv->flags & RNF_ONLINK) ? NEF_ONLINK : 0);
23c212e7 726 if (!nbr || (nbr->scope == SCOPE_HOST))
95616c82
OZ
727 return NULL;
728 }
62e64905 729
6b0f5f68 730#ifdef HAVE_MPLS_KERNEL
2eaf65ec 731 if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE])
6b0f5f68
MJM
732 {
733 if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS) {
734 log(L_WARN "KRT: Unknown encapsulation method %d in multipath", rta_get_u16(a[RTA_ENCAP_TYPE]));
735 return NULL;
d14f8c3c
JMM
736 }
737
6b0f5f68
MJM
738 struct rtattr *enca[BIRD_RTA_MAX];
739 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
740 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
741 rv->labels = rta_get_mpls(enca[RTA_DST], rv->label);
6b0f5f68
MJM
742 }
743#endif
744
95616c82
OZ
745
746 len -= NLMSG_ALIGN(nh->rtnh_len);
747 nh = RTNH_NEXT(nh);
748 }
749
59d3a361
OZ
750 /* Ensure nexthops are sorted to satisfy nest invariant */
751 if (!nexthop_is_sorted(first))
752 first = nexthop_sort(first);
753
95616c82
OZ
754 return first;
755}
756
9fdf9d29
OZ
757static void
758nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, int max)
759{
760 struct rtattr *a = nl_open_attr(h, bufsize, RTA_METRICS);
761 int t;
762
763 for (t = 1; t < max; t++)
764 if (metrics[0] & (1 << t))
765 nl_add_attr_u32(h, bufsize, t, metrics[t]);
766
767 nl_close_attr(h, a);
768}
769
770static int
771nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int max)
772{
773 struct rtattr *a = RTA_DATA(hdr);
774 int len = RTA_PAYLOAD(hdr);
775
776 metrics[0] = 0;
777 for (; RTA_OK(a, len); a = RTA_NEXT(a, len))
778 {
779 if (a->rta_type == RTA_UNSPEC)
780 continue;
781
782 if (a->rta_type >= max)
783 continue;
784
785 if (RTA_PAYLOAD(a) != 4)
786 return -1;
787
788 metrics[0] |= 1 << a->rta_type;
acb04cfd 789 metrics[a->rta_type] = rta_get_u32(a);
9fdf9d29
OZ
790 }
791
792 if (len > 0)
793 return -1;
794
795 return 0;
796}
797
95616c82
OZ
798
799/*
800 * Scanning of interfaces
801 */
802
803static void
804nl_parse_link(struct nlmsghdr *h, int scan)
805{
806 struct ifinfomsg *i;
ad276157 807 struct rtattr *a[BIRD_IFLA_MAX];
95616c82
OZ
808 int new = h->nlmsg_type == RTM_NEWLINK;
809 struct iface f = {};
810 struct iface *ifi;
811 char *name;
943478b0 812 u32 mtu, master = 0;
ae80a2de 813 uint fl;
95616c82 814
ad276157 815 if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), ifla_attr_want, a, sizeof(a)))
95616c82 816 return;
ad276157 817 if (!a[IFLA_IFNAME] || (RTA_PAYLOAD(a[IFLA_IFNAME]) < 2) || !a[IFLA_MTU])
95616c82 818 {
ad276157
JMM
819 /*
820 * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
821 * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
822 * We simply ignore all such messages with IFLA_WIRELESS without notice.
823 */
824
825 if (a[IFLA_WIRELESS])
826 return;
827
828 log(L_ERR "KIF: Malformed message received");
95616c82
OZ
829 return;
830 }
ad276157 831
95616c82 832 name = RTA_DATA(a[IFLA_IFNAME]);
acb04cfd 833 mtu = rta_get_u32(a[IFLA_MTU]);
95616c82 834
943478b0
OZ
835 if (a[IFLA_MASTER])
836 master = rta_get_u32(a[IFLA_MASTER]);
837
95616c82
OZ
838 ifi = if_find_by_index(i->ifi_index);
839 if (!new)
840 {
841 DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
842 if (!ifi)
843 return;
844
845 if_delete(ifi);
846 }
847 else
848 {
849 DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
850 if (ifi && strncmp(ifi->name, name, sizeof(ifi->name)-1))
851 if_delete(ifi);
852
853 strncpy(f.name, name, sizeof(f.name)-1);
854 f.index = i->ifi_index;
855 f.mtu = mtu;
856
943478b0
OZ
857 f.master_index = master;
858 f.master = if_find_by_index(master);
859
95616c82
OZ
860 fl = i->ifi_flags;
861 if (fl & IFF_UP)
862 f.flags |= IF_ADMIN_UP;
863 if (fl & IFF_LOWER_UP)
864 f.flags |= IF_LINK_UP;
865 if (fl & IFF_LOOPBACK) /* Loopback */
866 f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
867 else if (fl & IFF_POINTOPOINT) /* PtP */
868 f.flags |= IF_MULTICAST;
869 else if (fl & IFF_BROADCAST) /* Broadcast */
870 f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
871 else
872 f.flags |= IF_MULTIACCESS; /* NBMA */
3216eb03 873
16a3254c
OZ
874 if (fl & IFF_MULTICAST)
875 f.flags |= IF_MULTICAST;
876
3216eb03
OZ
877 ifi = if_update(&f);
878
879 if (!scan)
880 if_end_partial_update(ifi);
95616c82
OZ
881 }
882}
883
884static void
9b136840 885nl_parse_addr4(struct ifaddrmsg *i, int scan, int new)
95616c82 886{
ad276157 887 struct rtattr *a[BIRD_IFA_MAX];
95616c82 888 struct iface *ifi;
e37d2e3e 889 u32 ifa_flags;
95616c82
OZ
890 int scope;
891
9b136840 892 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a)))
95616c82 893 return;
ad276157 894
9b136840 895 if (!a[IFA_LOCAL])
ad276157 896 {
9b136840
JMM
897 log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)");
898 return;
ad276157 899 }
ad276157 900 if (!a[IFA_ADDRESS])
95616c82 901 {
ad276157 902 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
95616c82
OZ
903 return;
904 }
905
906 ifi = if_find_by_index(i->ifa_index);
907 if (!ifi)
908 {
909 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
910 return;
911 }
912
e37d2e3e
OZ
913 if (a[IFA_FLAGS])
914 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
915 else
916 ifa_flags = i->ifa_flags;
917
9b136840 918 struct ifa ifa;
95616c82
OZ
919 bzero(&ifa, sizeof(ifa));
920 ifa.iface = ifi;
cc5b93f7 921 if (ifa_flags & IFA_F_SECONDARY)
95616c82
OZ
922 ifa.flags |= IA_SECONDARY;
923
9b136840
JMM
924 ifa.ip = rta_get_ipa(a[IFA_LOCAL]);
925
d7661fbe 926 if (i->ifa_prefixlen > IP4_MAX_PREFIX_LENGTH)
95616c82
OZ
927 {
928 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
929 new = 0;
930 }
d7661fbe 931 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH)
95616c82 932 {
9b136840
JMM
933 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
934 net_fill_ip4(&ifa.prefix, rta_get_ip4(a[IFA_ADDRESS]), i->ifa_prefixlen);
95616c82
OZ
935
936 /* It is either a host address or a peer address */
9b136840 937 if (ipa_equal(ifa.ip, ifa.brd))
95616c82
OZ
938 ifa.flags |= IA_HOST;
939 else
940 {
941 ifa.flags |= IA_PEER;
9b136840 942 ifa.opposite = ifa.brd;
95616c82
OZ
943 }
944 }
945 else
946 {
9b136840
JMM
947 net_fill_ip4(&ifa.prefix, ipa_to_ip4(ifa.ip), i->ifa_prefixlen);
948 net_normalize(&ifa.prefix);
949
d7661fbe 950 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 1)
95616c82
OZ
951 ifa.opposite = ipa_opposite_m1(ifa.ip);
952
d7661fbe 953 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 2)
95616c82
OZ
954 ifa.opposite = ipa_opposite_m2(ifa.ip);
955
956 if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST])
957 {
9b136840
JMM
958 ip4_addr xbrd = rta_get_ip4(a[IFA_BROADCAST]);
959 ip4_addr ybrd = ip4_or(ipa_to_ip4(ifa.ip), ip4_not(ip4_mkmask(i->ifa_prefixlen)));
960
961 if (ip4_equal(xbrd, net4_prefix(&ifa.prefix)) || ip4_equal(xbrd, ybrd))
962 ifa.brd = ipa_from_ip4(xbrd);
95616c82 963 else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */
9b136840 964 {
e691d16a 965 log(L_ERR "KIF: Invalid broadcast address %I4 for %s", xbrd, ifi->name);
9b136840
JMM
966 ifa.brd = ipa_from_ip4(ybrd);
967 }
968 }
969 }
970
971 scope = ipa_classify(ifa.ip);
972 if (scope < 0)
973 {
974 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
975 return;
976 }
977 ifa.scope = scope & IADDR_SCOPE_MASK;
978
979 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
980 ifi->index, ifi->name,
981 new ? "added" : "removed",
4659b2ae 982 ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite);
9b136840
JMM
983
984 if (new)
985 ifa_update(&ifa);
986 else
987 ifa_delete(&ifa);
988
989 if (!scan)
990 if_end_partial_update(ifi);
991}
992
993static void
994nl_parse_addr6(struct ifaddrmsg *i, int scan, int new)
995{
996 struct rtattr *a[BIRD_IFA_MAX];
997 struct iface *ifi;
cc5b93f7 998 u32 ifa_flags;
9b136840
JMM
999 int scope;
1000
1001 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a)))
1002 return;
1003
1004 if (!a[IFA_ADDRESS])
1005 {
1006 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
1007 return;
1008 }
1009
1010 ifi = if_find_by_index(i->ifa_index);
1011 if (!ifi)
1012 {
1013 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
1014 return;
1015 }
1016
cc5b93f7
OZ
1017 if (a[IFA_FLAGS])
1018 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
1019 else
1020 ifa_flags = i->ifa_flags;
1021
9b136840
JMM
1022 struct ifa ifa;
1023 bzero(&ifa, sizeof(ifa));
1024 ifa.iface = ifi;
e37d2e3e 1025 if (ifa_flags & IFA_F_SECONDARY)
9b136840
JMM
1026 ifa.flags |= IA_SECONDARY;
1027
e37d2e3e
OZ
1028 /* Ignore tentative addresses silently */
1029 if (ifa_flags & IFA_F_TENTATIVE)
1030 return;
9b136840 1031
95616c82 1032 /* IFA_LOCAL can be unset for IPv6 interfaces */
9b136840
JMM
1033 ifa.ip = rta_get_ipa(a[IFA_LOCAL] ? : a[IFA_ADDRESS]);
1034
d7661fbe 1035 if (i->ifa_prefixlen > IP6_MAX_PREFIX_LENGTH)
9b136840
JMM
1036 {
1037 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
1038 new = 0;
1039 }
d7661fbe 1040 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH)
9b136840
JMM
1041 {
1042 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
1043 net_fill_ip6(&ifa.prefix, rta_get_ip6(a[IFA_ADDRESS]), i->ifa_prefixlen);
1044
1045 /* It is either a host address or a peer address */
1046 if (ipa_equal(ifa.ip, ifa.brd))
1047 ifa.flags |= IA_HOST;
1048 else
1049 {
1050 ifa.flags |= IA_PEER;
1051 ifa.opposite = ifa.brd;
95616c82 1052 }
9b136840
JMM
1053 }
1054 else
1055 {
1056 net_fill_ip6(&ifa.prefix, ipa_to_ip6(ifa.ip), i->ifa_prefixlen);
1057 net_normalize(&ifa.prefix);
1058
d7661fbe 1059 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH - 1)
9b136840 1060 ifa.opposite = ipa_opposite_m1(ifa.ip);
95616c82
OZ
1061 }
1062
1063 scope = ipa_classify(ifa.ip);
1064 if (scope < 0)
1065 {
1066 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
1067 return;
1068 }
1069 ifa.scope = scope & IADDR_SCOPE_MASK;
1070
9b136840 1071 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
95616c82
OZ
1072 ifi->index, ifi->name,
1073 new ? "added" : "removed",
4659b2ae 1074 ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite);
3216eb03 1075
95616c82
OZ
1076 if (new)
1077 ifa_update(&ifa);
1078 else
1079 ifa_delete(&ifa);
3216eb03
OZ
1080
1081 if (!scan)
1082 if_end_partial_update(ifi);
95616c82
OZ
1083}
1084
9b136840
JMM
1085static void
1086nl_parse_addr(struct nlmsghdr *h, int scan)
1087{
1088 struct ifaddrmsg *i;
1089
1090 if (!(i = nl_checkin(h, sizeof(*i))))
1091 return;
1092
1093 int new = (h->nlmsg_type == RTM_NEWADDR);
1094
1095 switch (i->ifa_family)
1096 {
9b136840
JMM
1097 case AF_INET:
1098 return nl_parse_addr4(i, scan, new);
29a64162 1099
9b136840
JMM
1100 case AF_INET6:
1101 return nl_parse_addr6(i, scan, new);
9b136840
JMM
1102 }
1103}
1104
95616c82
OZ
1105void
1106kif_do_scan(struct kif_proto *p UNUSED)
1107{
1108 struct nlmsghdr *h;
1109
1110 if_start_update();
1111
86c3eea0 1112 nl_request_dump(AF_UNSPEC, RTM_GETLINK);
95616c82
OZ
1113 while (h = nl_get_scan())
1114 if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
1115 nl_parse_link(h, 1);
1116 else
1117 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1118
943478b0
OZ
1119 /* Re-resolve master interface for slaves */
1120 struct iface *i;
1121 WALK_LIST(i, iface_list)
1122 if (i->master_index)
1123 {
1124 struct iface f = {
1125 .flags = i->flags,
1126 .mtu = i->mtu,
1127 .index = i->index,
1128 .master_index = i->master_index,
1129 .master = if_find_by_index(i->master_index)
1130 };
1131
1132 if (f.master != i->master)
1133 {
1134 memcpy(f.name, i->name, sizeof(f.name));
1135 if_update(&f);
1136 }
1137 }
1138
d7661fbe 1139 nl_request_dump(AF_INET, RTM_GETADDR);
95616c82
OZ
1140 while (h = nl_get_scan())
1141 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
3216eb03 1142 nl_parse_addr(h, 1);
95616c82
OZ
1143 else
1144 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1145
d7661fbe
JMM
1146 nl_request_dump(AF_INET6, RTM_GETADDR);
1147 while (h = nl_get_scan())
1148 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
1149 nl_parse_addr(h, 1);
1150 else
1151 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1152
95616c82
OZ
1153 if_end_update();
1154}
1155
1156/*
1157 * Routes
1158 */
1159
9ddbfbdd
JMM
1160static inline u32
1161krt_table_id(struct krt_proto *p)
1162{
1163 return KRT_CF->sys.table_id;
1164}
1165
1166static HASH(struct krt_proto) nl_table_map;
1167
29a64162
OZ
1168#define RTH_KEY(p) p->af, krt_table_id(p)
1169#define RTH_NEXT(p) p->sys.hash_next
1170#define RTH_EQ(a1,i1,a2,i2) a1 == a2 && i1 == i2
1171#define RTH_FN(a,i) a ^ u32_hash(i)
9ddbfbdd
JMM
1172
1173#define RTH_REHASH rth_rehash
1174#define RTH_PARAMS /8, *2, 2, 2, 6, 20
1175
1176HASH_DEFINE_REHASH_FN(RTH, struct krt_proto)
95616c82
OZ
1177
1178int
1179krt_capable(rte *e)
1180{
1181 rta *a = e->attrs;
1182
95616c82 1183 switch (a->dest)
62e64905 1184 {
4e276a89 1185 case RTD_UNICAST:
95616c82
OZ
1186 case RTD_BLACKHOLE:
1187 case RTD_UNREACHABLE:
1188 case RTD_PROHIBIT:
62e64905
OZ
1189 return 1;
1190
95616c82
OZ
1191 default:
1192 return 0;
62e64905 1193 }
95616c82
OZ
1194}
1195
1196static inline int
4e276a89 1197nh_bufsize(struct nexthop *nh)
95616c82
OZ
1198{
1199 int rv = 0;
1200 for (; nh != NULL; nh = nh->next)
9fdf9d29 1201 rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
95616c82
OZ
1202 return rv;
1203}
1204
1205static int
13c0be19 1206nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh)
95616c82
OZ
1207{
1208 eattr *ea;
1209 net *net = e->net;
1210 rta *a = e->attrs;
13c0be19 1211 ea_list *eattrs = a->eattrs;
4e276a89 1212 int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh));
4adcb9df 1213 u32 priority = 0;
a8caff32 1214
95616c82
OZ
1215 struct {
1216 struct nlmsghdr h;
1217 struct rtmsg r;
a8caff32
JMM
1218 char buf[0];
1219 } *r;
1220
1221 int rsize = sizeof(*r) + bufsize;
1222 r = alloca(rsize);
95616c82 1223
cc5b93f7 1224 DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op);
95616c82 1225
a8caff32
JMM
1226 bzero(&r->h, sizeof(r->h));
1227 bzero(&r->r, sizeof(r->r));
cc5b93f7 1228 r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE;
a8caff32 1229 r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
cc5b93f7 1230 r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK;
95616c82 1231
a8caff32
JMM
1232 r->r.rtm_family = p->af;
1233 r->r.rtm_dst_len = net_pxlen(net->n.addr);
1234 r->r.rtm_protocol = RTPROT_BIRD;
7074be22 1235 r->r.rtm_scope = RT_SCOPE_NOWHERE;
6b0f5f68 1236#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
1237 if (p->af == AF_MPLS)
1238 {
66acbc8d
OZ
1239 /*
1240 * Kernel MPLS code is a bit picky. We must:
1241 * 1) Always set RT_SCOPE_UNIVERSE and RTN_UNICAST (even for RTM_DELROUTE)
1242 * 2) Never use RTA_PRIORITY
1243 */
1244
d14f8c3c
JMM
1245 u32 label = net_mpls(net->n.addr);
1246 nl_add_attr_mpls(&r->h, rsize, RTA_DST, 1, &label);
66acbc8d
OZ
1247 r->r.rtm_scope = RT_SCOPE_UNIVERSE;
1248 r->r.rtm_type = RTN_UNICAST;
d14f8c3c
JMM
1249 }
1250 else
6b0f5f68 1251#endif
be17805c 1252 {
d14f8c3c 1253 nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr));
95616c82 1254
be17805c
OZ
1255 /* Add source address for IPv6 SADR routes */
1256 if (net->n.addr->type == NET_IP6_SADR)
1257 {
1258 net_addr_ip6_sadr *a = (void *) &net->n.addr;
1259 nl_add_attr_ip6(&r->h, rsize, RTA_SRC, a->src_prefix);
1260 r->r.rtm_src_len = a->src_pxlen;
1261 }
1262 }
1263
2feaa693
OZ
1264 /*
1265 * Strange behavior for RTM_DELROUTE:
1266 * 1) rtm_family is ignored in IPv6, works for IPv4
1267 * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
1268 * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
1269 */
1270
9ddbfbdd 1271 if (krt_table_id(p) < 256)
a8caff32 1272 r->r.rtm_table = krt_table_id(p);
9ddbfbdd 1273 else
a8caff32 1274 nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p));
9ddbfbdd 1275
66acbc8d
OZ
1276 if (p->af == AF_MPLS)
1277 priority = 0;
1278 else if (a->source == RTS_DUMMY)
4adcb9df
OZ
1279 priority = e->u.krt.metric;
1280 else if (KRT_CF->sys.metric)
1281 priority = KRT_CF->sys.metric;
1282 else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC)))
1283 priority = ea->u.data;
78a2cc28 1284
4adcb9df 1285 if (priority)
d1b8fe93 1286 nl_add_attr_u32(&r->h, rsize, RTA_PRIORITY, priority);
78a2cc28 1287
2feaa693
OZ
1288 /* For route delete, we do not specify remaining route attributes */
1289 if (op == NL_OP_DELETE)
1290 goto dest;
78a2cc28 1291
6e75d0d2 1292 /* Default scope is LINK for device routes, UNIVERSE otherwise */
66acbc8d
OZ
1293 if (p->af == AF_MPLS)
1294 r->r.rtm_scope = RT_SCOPE_UNIVERSE;
1295 else if (ea = ea_find(eattrs, EA_KRT_SCOPE))
cc5b93f7 1296 r->r.rtm_scope = ea->u.data;
6e75d0d2 1297 else
4e276a89 1298 r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
95616c82
OZ
1299
1300 if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
a8caff32 1301 nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
95616c82
OZ
1302
1303 if (ea = ea_find(eattrs, EA_KRT_REALM))
a8caff32 1304 nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data);
95616c82 1305
9fdf9d29
OZ
1306
1307 u32 metrics[KRT_METRICS_MAX];
1308 metrics[0] = 0;
1309
1310 struct ea_walk_state ews = { .eattrs = eattrs };
1311 while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX))
1312 {
1313 int id = ea->id - EA_KRT_METRICS;
1314 metrics[0] |= 1 << id;
1315 metrics[id] = ea->u.data;
1316 }
1317
1318 if (metrics[0])
a8caff32 1319 nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX);
9fdf9d29
OZ
1320
1321
2feaa693 1322dest:
2feaa693 1323 switch (dest)
95616c82 1324 {
4e276a89 1325 case RTD_UNICAST:
a8caff32 1326 r->r.rtm_type = RTN_UNICAST;
4e276a89 1327 if (nh->next && !krt_ecmp6(p))
d14f8c3c 1328 nl_add_multipath(&r->h, rsize, nh, p->af);
4e276a89
JMM
1329 else
1330 {
1331 nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index);
d14f8c3c 1332 nl_add_nexthop(&r->h, rsize, nh, p->af);
a1f5e514
OZ
1333
1334 if (nh->flags & RNF_ONLINK)
1335 r->r.rtm_flags |= RTNH_F_ONLINK;
4e276a89 1336 }
95616c82
OZ
1337 break;
1338 case RTD_BLACKHOLE:
a8caff32 1339 r->r.rtm_type = RTN_BLACKHOLE;
95616c82
OZ
1340 break;
1341 case RTD_UNREACHABLE:
a8caff32 1342 r->r.rtm_type = RTN_UNREACHABLE;
95616c82
OZ
1343 break;
1344 case RTD_PROHIBIT:
a8caff32 1345 r->r.rtm_type = RTN_PROHIBIT;
95616c82 1346 break;
2feaa693
OZ
1347 case RTD_NONE:
1348 break;
95616c82
OZ
1349 default:
1350 bug("krt_capable inconsistent with nl_send_route");
1351 }
1352
2feaa693 1353 /* Ignore missing for DELETE */
cc5b93f7 1354 return nl_exchange(&r->h, (op == NL_OP_DELETE));
2feaa693
OZ
1355}
1356
1357static inline int
13c0be19 1358nl_add_rte(struct krt_proto *p, rte *e)
2feaa693
OZ
1359{
1360 rta *a = e->attrs;
1361 int err = 0;
1362
4e276a89 1363 if (krt_ecmp6(p) && a->nh.next)
2feaa693 1364 {
4e276a89 1365 struct nexthop *nh = &(a->nh);
2feaa693 1366
13c0be19 1367 err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, nh);
2feaa693
OZ
1368 if (err < 0)
1369 return err;
1370
1371 for (nh = nh->next; nh; nh = nh->next)
13c0be19 1372 err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, nh);
2feaa693
OZ
1373
1374 return err;
1375 }
1376
13c0be19 1377 return nl_send_route(p, e, NL_OP_ADD, a->dest, &(a->nh));
2feaa693
OZ
1378}
1379
1380static inline int
13c0be19 1381nl_delete_rte(struct krt_proto *p, rte *e)
2feaa693
OZ
1382{
1383 int err = 0;
1384
1385 /* For IPv6, we just repeatedly request DELETE until we get error */
1386 do
13c0be19 1387 err = nl_send_route(p, e, NL_OP_DELETE, RTD_NONE, NULL);
2feaa693
OZ
1388 while (krt_ecmp6(p) && !err);
1389
1390 return err;
95616c82
OZ
1391}
1392
8235c474
OZ
1393static inline int
1394nl_replace_rte(struct krt_proto *p, rte *e)
1395{
1396 rta *a = e->attrs;
1397 return nl_send_route(p, e, NL_OP_REPLACE, a->dest, &(a->nh));
1398}
1399
1400
95616c82 1401void
13c0be19 1402krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old)
95616c82
OZ
1403{
1404 int err = 0;
1405
1406 /*
8235c474
OZ
1407 * We use NL_OP_REPLACE for IPv4, it has an issue with not checking for
1408 * matching rtm_protocol, but that is OK when dedicated priority is used.
2feaa693 1409 *
8235c474
OZ
1410 * We do not use NL_OP_REPLACE for IPv6, as it has broken semantics for ECMP
1411 * and with some kernel versions ECMP replace crashes kernel. Would need more
1412 * testing and checks for kernel versions.
2feaa693 1413 *
8235c474
OZ
1414 * For IPv6, we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the
1415 * old route value, so we do not try to optimize IPv6 ECMP reconfigurations.
95616c82
OZ
1416 */
1417
8235c474
OZ
1418 if (krt_ipv4(p) && old && new)
1419 {
1420 err = nl_replace_rte(p, new);
1421 }
1422 else
1423 {
1424 if (old)
1425 nl_delete_rte(p, old);
95616c82 1426
8235c474
OZ
1427 if (new)
1428 err = nl_add_rte(p, new);
1429 }
95616c82
OZ
1430
1431 if (err < 0)
1432 n->n.flags |= KRF_SYNC_ERROR;
1433 else
1434 n->n.flags &= ~KRF_SYNC_ERROR;
1435}
1436
2feaa693 1437static int
1187627a 1438nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type, uint rtm_family)
2feaa693 1439{
1187627a
OZ
1440 /* Route merging is used for IPv6 scans */
1441 if (!s->scan || (rtm_family != AF_INET6))
2feaa693
OZ
1442 return 0;
1443
1444 /* Saved and new route must have same network, proto/table, and priority */
1445 if ((s->net != net) || (s->proto != p) || (s->krt_metric != priority))
1446 return 0;
1447
1448 /* Both must be regular unicast routes */
1449 if ((s->krt_type != RTN_UNICAST) || (krt_type != RTN_UNICAST))
1450 return 0;
1451
1452 return 1;
1453}
1454
1455static void
1456nl_announce_route(struct nl_parse_state *s)
1457{
1458 rte *e = rte_get_temp(s->attrs);
1459 e->net = s->net;
1460 e->u.krt.src = s->krt_src;
1461 e->u.krt.proto = s->krt_proto;
1462 e->u.krt.seen = 0;
1463 e->u.krt.best = 0;
1464 e->u.krt.metric = s->krt_metric;
1465
1466 if (s->scan)
1467 krt_got_route(s->proto, e);
1468 else
1469 krt_got_route_async(s->proto, e, s->new);
1470
1471 s->net = NULL;
1472 s->attrs = NULL;
1473 s->proto = NULL;
1474 lp_flush(s->pool);
1475}
1476
1477static inline void
1187627a 1478nl_parse_begin(struct nl_parse_state *s, int scan)
2feaa693
OZ
1479{
1480 memset(s, 0, sizeof (struct nl_parse_state));
1481 s->pool = nl_linpool;
1482 s->scan = scan;
2feaa693
OZ
1483}
1484
1485static inline void
1486nl_parse_end(struct nl_parse_state *s)
1487{
1488 if (s->net)
1489 nl_announce_route(s);
1490}
1491
1492
95616c82
OZ
1493#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
1494
1495static void
2feaa693 1496nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
95616c82
OZ
1497{
1498 struct krt_proto *p;
1499 struct rtmsg *i;
ad276157 1500 struct rtattr *a[BIRD_RTA_MAX];
95616c82
OZ
1501 int new = h->nlmsg_type == RTM_NEWROUTE;
1502
be17805c 1503 net_addr dst, src = {};
95616c82 1504 u32 oif = ~0;
29a64162 1505 u32 table_id;
2feaa693 1506 u32 priority = 0;
6e75d0d2 1507 u32 def_scope = RT_SCOPE_UNIVERSE;
be17805c 1508 int krt_src;
95616c82 1509
ad276157 1510 if (!(i = nl_checkin(h, sizeof(*i))))
95616c82 1511 return;
ad276157
JMM
1512
1513 switch (i->rtm_family)
95616c82 1514 {
29a64162
OZ
1515 case AF_INET:
1516 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a)))
1517 return;
1518
1519 if (a[RTA_DST])
1520 net_fill_ip4(&dst, rta_get_ip4(a[RTA_DST]), i->rtm_dst_len);
1521 else
1522 net_fill_ip4(&dst, IP4_NONE, 0);
1523 break;
1524
cc5b93f7
OZ
1525 case AF_INET6:
1526 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a)))
1527 return;
29a64162
OZ
1528
1529 if (a[RTA_DST])
1530 net_fill_ip6(&dst, rta_get_ip6(a[RTA_DST]), i->rtm_dst_len);
1531 else
1532 net_fill_ip6(&dst, IP6_NONE, 0);
be17805c
OZ
1533
1534 if (a[RTA_SRC])
1535 net_fill_ip6(&src, rta_get_ip6(a[RTA_SRC]), i->rtm_src_len);
1536 else
1537 net_fill_ip6(&src, IP6_NONE, 0);
29a64162
OZ
1538 break;
1539
6b0f5f68 1540#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
1541 case AF_MPLS:
1542 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want_mpls, a, sizeof(a)))
1543 return;
1544
ed610044
OZ
1545 if (!a[RTA_DST])
1546 SKIP("MPLS route without RTA_DST");
1547
1548 if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) != 1)
1549 SKIP("MPLS route with multi-label RTA_DST");
1550
1551 net_fill_mpls(&dst, rta_mpls_stack[0]);
d14f8c3c 1552 break;
6b0f5f68 1553#endif
d14f8c3c 1554
29a64162
OZ
1555 default:
1556 return;
95616c82
OZ
1557 }
1558
95616c82 1559 if (a[RTA_OIF])
acb04cfd 1560 oif = rta_get_u32(a[RTA_OIF]);
95616c82 1561
9ddbfbdd 1562 if (a[RTA_TABLE])
29a64162 1563 table_id = rta_get_u32(a[RTA_TABLE]);
9ddbfbdd 1564 else
29a64162 1565 table_id = i->rtm_table;
9ddbfbdd 1566
29a64162
OZ
1567 /* Do we know this table? */
1568 p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id);
95616c82 1569 if (!p)
4659b2ae 1570 SKIP("unknown table %u\n", table_id);
95616c82 1571
be17805c
OZ
1572 if (a[RTA_SRC] && (p->p.net_type != NET_IP6_SADR))
1573 SKIP("src prefix for non-SADR channel\n");
1574
95616c82
OZ
1575 if (a[RTA_IIF])
1576 SKIP("IIF set\n");
29a64162 1577
95616c82
OZ
1578 if (i->rtm_tos != 0) /* We don't support TOS */
1579 SKIP("TOS %02x\n", i->rtm_tos);
95616c82 1580
2feaa693 1581 if (s->scan && !new)
95616c82
OZ
1582 SKIP("RTM_DELROUTE in scan\n");
1583
2feaa693
OZ
1584 if (a[RTA_PRIORITY])
1585 priority = rta_get_u32(a[RTA_PRIORITY]);
1586
9b136840 1587 int c = net_classify(&dst);
95616c82
OZ
1588 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
1589 SKIP("strange class/scope\n");
1590
95616c82
OZ
1591 switch (i->rtm_protocol)
1592 {
1593 case RTPROT_UNSPEC:
1594 SKIP("proto unspec\n");
1595
1596 case RTPROT_REDIRECT:
be17805c 1597 krt_src = KRT_SRC_REDIRECT;
95616c82
OZ
1598 break;
1599
1600 case RTPROT_KERNEL:
be17805c 1601 krt_src = KRT_SRC_KERNEL;
95616c82
OZ
1602 return;
1603
1604 case RTPROT_BIRD:
2feaa693 1605 if (!s->scan)
95616c82 1606 SKIP("echo\n");
be17805c 1607 krt_src = KRT_SRC_BIRD;
95616c82
OZ
1608 break;
1609
1610 case RTPROT_BOOT:
1611 default:
be17805c 1612 krt_src = KRT_SRC_ALIEN;
95616c82
OZ
1613 }
1614
be17805c
OZ
1615 net_addr *n = &dst;
1616 if (p->p.net_type == NET_IP6_SADR)
1617 {
1618 n = alloca(sizeof(net_addr_ip6_sadr));
1619 net_fill_ip6_sadr(n, net6_prefix(&dst), net6_pxlen(&dst),
1620 net6_prefix(&src), net6_pxlen(&src));
1621 }
1622
1623 net *net = net_get(p->p.main_channel->table, n);
95616c82 1624
1187627a 1625 if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type, i->rtm_family))
2feaa693
OZ
1626 nl_announce_route(s);
1627
d14f8c3c 1628 rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE);
2feaa693
OZ
1629 ra->src = p->p.main_source;
1630 ra->source = RTS_INHERIT;
1631 ra->scope = SCOPE_UNIVERSE;
95616c82
OZ
1632
1633 switch (i->rtm_type)
1634 {
1635 case RTN_UNICAST:
62e64905 1636 ra->dest = RTD_UNICAST;
95616c82 1637
98bb80a2 1638 if (a[RTA_MULTIPATH])
4ff15a75 1639 {
3e792350 1640 struct nexthop *nh = nl_parse_multipath(s, p, a[RTA_MULTIPATH], i->rtm_family);
4e276a89 1641 if (!nh)
95616c82 1642 {
fe9f1a6d 1643 log(L_ERR "KRT: Received strange multipath route %N", net->n.addr);
95616c82
OZ
1644 return;
1645 }
9fdf9d29 1646
2eaf65ec 1647 nexthop_link(ra, nh);
95616c82
OZ
1648 break;
1649 }
1650
4e276a89
JMM
1651 ra->nh.iface = if_find_by_index(oif);
1652 if (!ra->nh.iface)
95616c82 1653 {
fe9f1a6d 1654 log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif);
95616c82
OZ
1655 return;
1656 }
1657
53401bef
OZ
1658 if (a[RTA_GATEWAY])
1659 ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]);
1660
6b0f5f68 1661#ifdef HAVE_MPLS_KERNEL
53401bef
OZ
1662 if (a[RTA_VIA])
1663 ra->nh.gw = rta_get_via(a[RTA_VIA]);
6b0f5f68 1664#endif
95616c82 1665
53401bef
OZ
1666 if (ipa_nonzero(ra->nh.gw))
1667 {
95616c82 1668 /* Silently skip strange 6to4 routes */
0bf95f99 1669 const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96);
4e276a89 1670 if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit))
95616c82
OZ
1671 return;
1672
a1f5e514
OZ
1673 if (i->rtm_flags & RTNH_F_ONLINK)
1674 ra->nh.flags |= RNF_ONLINK;
1675
23c212e7 1676 neighbor *nbr;
586c1800
OZ
1677 nbr = neigh_find(&p->p, ra->nh.gw, ra->nh.iface,
1678 (ra->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0);
23c212e7 1679 if (!nbr || (nbr->scope == SCOPE_HOST))
95616c82 1680 {
4e276a89
JMM
1681 log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr,
1682 ra->nh.gw);
95616c82
OZ
1683 return;
1684 }
1685 }
95616c82
OZ
1686
1687 break;
1688 case RTN_BLACKHOLE:
2feaa693 1689 ra->dest = RTD_BLACKHOLE;
95616c82
OZ
1690 break;
1691 case RTN_UNREACHABLE:
2feaa693 1692 ra->dest = RTD_UNREACHABLE;
95616c82
OZ
1693 break;
1694 case RTN_PROHIBIT:
2feaa693 1695 ra->dest = RTD_PROHIBIT;
95616c82
OZ
1696 break;
1697 /* FIXME: What about RTN_THROW? */
1698 default:
1699 SKIP("type %d\n", i->rtm_type);
1700 return;
1701 }
1702
6b0f5f68 1703#ifdef HAVE_MPLS_KERNEL
d14f8c3c 1704 if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next)
2eaf65ec 1705 ra->nh.labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label);
d14f8c3c
JMM
1706
1707 if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next)
1708 {
1709 switch (rta_get_u16(a[RTA_ENCAP_TYPE]))
1710 {
1711 case LWTUNNEL_ENCAP_MPLS:
1712 {
1713 struct rtattr *enca[BIRD_RTA_MAX];
1714 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
1715 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
2eaf65ec 1716 ra->nh.labels = rta_get_mpls(enca[RTA_DST], ra->nh.label);
d14f8c3c
JMM
1717 break;
1718 }
1719 default:
1720 SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE]));
1721 break;
1722 }
1723 }
6b0f5f68 1724#endif
d14f8c3c 1725
6e75d0d2
OZ
1726 if (i->rtm_scope != def_scope)
1727 {
1728 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1729 ea->next = ra->eattrs;
1730 ra->eattrs = ea;
1731 ea->flags = EALF_SORTED;
1732 ea->count = 1;
1733 ea->attrs[0].id = EA_KRT_SCOPE;
1734 ea->attrs[0].flags = 0;
1735 ea->attrs[0].type = EAF_TYPE_INT;
1736 ea->attrs[0].u.data = i->rtm_scope;
1737 }
95616c82
OZ
1738
1739 if (a[RTA_PREFSRC])
1740 {
9b136840 1741 ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]);
95616c82 1742
2feaa693
OZ
1743 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1744 ea->next = ra->eattrs;
1745 ra->eattrs = ea;
95616c82
OZ
1746 ea->flags = EALF_SORTED;
1747 ea->count = 1;
1748 ea->attrs[0].id = EA_KRT_PREFSRC;
1749 ea->attrs[0].flags = 0;
1750 ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
4c553c5a
MM
1751
1752 struct adata *ad = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
1753 ad->length = sizeof(ps);
1754 memcpy(ad->data, &ps, sizeof(ps));
1755
1756 ea->attrs[0].u.ptr = ad;
95616c82
OZ
1757 }
1758
1759 if (a[RTA_FLOW])
1760 {
2feaa693
OZ
1761 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1762 ea->next = ra->eattrs;
1763 ra->eattrs = ea;
95616c82
OZ
1764 ea->flags = EALF_SORTED;
1765 ea->count = 1;
1766 ea->attrs[0].id = EA_KRT_REALM;
1767 ea->attrs[0].flags = 0;
1768 ea->attrs[0].type = EAF_TYPE_INT;
acb04cfd 1769 ea->attrs[0].u.data = rta_get_u32(a[RTA_FLOW]);
95616c82
OZ
1770 }
1771
9fdf9d29
OZ
1772 if (a[RTA_METRICS])
1773 {
1774 u32 metrics[KRT_METRICS_MAX];
2feaa693 1775 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
9fdf9d29
OZ
1776 int t, n = 0;
1777
1778 if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
1779 {
fe9f1a6d 1780 log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net->n.addr);
9fdf9d29
OZ
1781 return;
1782 }
1783
1784 for (t = 1; t < KRT_METRICS_MAX; t++)
1785 if (metrics[0] & (1 << t))
1786 {
ee7e2ffd 1787 ea->attrs[n].id = EA_CODE(PROTOCOL_KERNEL, KRT_METRICS_OFFSET + t);
9fdf9d29
OZ
1788 ea->attrs[n].flags = 0;
1789 ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */
1790 ea->attrs[n].u.data = metrics[t];
1791 n++;
1792 }
1793
1794 if (n > 0)
1795 {
2feaa693 1796 ea->next = ra->eattrs;
9fdf9d29
OZ
1797 ea->flags = EALF_SORTED;
1798 ea->count = n;
2feaa693 1799 ra->eattrs = ea;
9fdf9d29
OZ
1800 }
1801 }
1802
2feaa693
OZ
1803 /*
1804 * Ideally, now we would send the received route to the rest of kernel code.
98bb80a2
OZ
1805 * But IPv6 ECMP routes before 4.11 are sent as a sequence of routes, so we
1806 * postpone it and merge next hops until the end of the sequence. Note that
3e792350
OZ
1807 * when doing merging of next hops, we expect the new route to be unipath.
1808 * Otherwise, we ignore additional next hops in nexthop_insert().
2feaa693
OZ
1809 */
1810
1811 if (!s->net)
1812 {
1813 /* Store the new route */
1814 s->net = net;
1815 s->attrs = ra;
1816 s->proto = p;
1817 s->new = new;
be17805c 1818 s->krt_src = krt_src;
2feaa693
OZ
1819 s->krt_type = i->rtm_type;
1820 s->krt_proto = i->rtm_protocol;
1821 s->krt_metric = priority;
1822 }
95616c82 1823 else
2feaa693
OZ
1824 {
1825 /* Merge next hops with the stored route */
62e64905 1826 rta *oa = s->attrs;
2feaa693 1827
62e64905
OZ
1828 struct nexthop *nhs = &oa->nh;
1829 nexthop_insert(&nhs, &ra->nh);
1830
1831 /* Perhaps new nexthop is inserted at the first position */
1832 if (nhs == &ra->nh)
1833 {
1834 /* Swap rtas */
1835 s->attrs = ra;
1836
1837 /* Keep old eattrs */
1838 ra->eattrs = oa->eattrs;
1839 }
2feaa693 1840 }
95616c82
OZ
1841}
1842
1843void
1844krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
1845{
1846 struct nlmsghdr *h;
2feaa693 1847 struct nl_parse_state s;
95616c82 1848
1187627a
OZ
1849 nl_parse_begin(&s, 1);
1850 nl_request_dump(AF_UNSPEC, RTM_GETROUTE);
95616c82
OZ
1851 while (h = nl_get_scan())
1852 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
2feaa693 1853 nl_parse_route(&s, h);
95616c82
OZ
1854 else
1855 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
cc5b93f7 1856 nl_parse_end(&s);
95616c82
OZ
1857}
1858
1859/*
1860 * Asynchronous Netlink interface
1861 */
1862
1863static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
1864static byte *nl_async_rx_buffer; /* Receive buffer */
1865
1866static void
1867nl_async_msg(struct nlmsghdr *h)
1868{
2feaa693
OZ
1869 struct nl_parse_state s;
1870
95616c82
OZ
1871 switch (h->nlmsg_type)
1872 {
1873 case RTM_NEWROUTE:
1874 case RTM_DELROUTE:
1875 DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
1187627a 1876 nl_parse_begin(&s, 0);
2feaa693
OZ
1877 nl_parse_route(&s, h);
1878 nl_parse_end(&s);
95616c82
OZ
1879 break;
1880 case RTM_NEWLINK:
1881 case RTM_DELLINK:
1882 DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
1e4891e4
OZ
1883 if (kif_proto)
1884 nl_parse_link(h, 0);
95616c82
OZ
1885 break;
1886 case RTM_NEWADDR:
1887 case RTM_DELADDR:
1888 DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
1e4891e4
OZ
1889 if (kif_proto)
1890 nl_parse_addr(h, 0);
95616c82
OZ
1891 break;
1892 default:
1893 DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
1894 }
1895}
1896
1897static int
3e236955 1898nl_async_hook(sock *sk, uint size UNUSED)
95616c82
OZ
1899{
1900 struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
1901 struct sockaddr_nl sa;
31e9e101
ST
1902 struct msghdr m = {
1903 .msg_name = &sa,
1904 .msg_namelen = sizeof(sa),
1905 .msg_iov = &iov,
1906 .msg_iovlen = 1,
1907 };
95616c82
OZ
1908 struct nlmsghdr *h;
1909 int x;
ae80a2de 1910 uint len;
95616c82
OZ
1911
1912 x = recvmsg(sk->fd, &m, 0);
1913 if (x < 0)
1914 {
1915 if (errno == ENOBUFS)
1916 {
1917 /*
1918 * Netlink reports some packets have been thrown away.
1919 * One day we might react to it by asking for route table
1920 * scan in near future.
1921 */
2c33da50 1922 log(L_WARN "Kernel dropped some netlink messages, will resync on next scan.");
95616c82
OZ
1923 return 1; /* More data are likely to be ready */
1924 }
1925 else if (errno != EWOULDBLOCK)
1926 log(L_ERR "Netlink recvmsg: %m");
1927 return 0;
1928 }
1929 if (sa.nl_pid) /* It isn't from the kernel */
1930 {
1931 DBG("Non-kernel packet\n");
1932 return 1;
1933 }
1934 h = (void *) nl_async_rx_buffer;
1935 len = x;
1936 if (m.msg_flags & MSG_TRUNC)
1937 {
1938 log(L_WARN "Netlink got truncated asynchronous message");
1939 return 1;
1940 }
1941 while (NLMSG_OK(h, len))
1942 {
1943 nl_async_msg(h);
1944 h = NLMSG_NEXT(h, len);
1945 }
1946 if (len)
1947 log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
1948 return 1;
1949}
1950
ccd2a3ed
JMM
1951static void
1952nl_async_err_hook(sock *sk, int e UNUSED)
1953{
1954 nl_async_hook(sk, 0);
1955}
1956
95616c82
OZ
1957static void
1958nl_open_async(void)
1959{
1960 sock *sk;
1961 struct sockaddr_nl sa;
1962 int fd;
95616c82 1963
f83ce94d 1964 if (nl_async_sk)
95616c82 1965 return;
95616c82
OZ
1966
1967 DBG("KRT: Opening async netlink socket\n");
1968
1969 fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1970 if (fd < 0)
1971 {
1972 log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
1973 return;
1974 }
1975
1976 bzero(&sa, sizeof(sa));
1977 sa.nl_family = AF_NETLINK;
29a64162
OZ
1978 sa.nl_groups = RTMGRP_LINK |
1979 RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE |
1980 RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
1981
95616c82
OZ
1982 if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
1983 {
1984 log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
f83ce94d 1985 close(fd);
95616c82
OZ
1986 return;
1987 }
1988
f83ce94d
OZ
1989 nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
1990
95616c82
OZ
1991 sk = nl_async_sk = sk_new(krt_pool);
1992 sk->type = SK_MAGIC;
1993 sk->rx_hook = nl_async_hook;
ccd2a3ed 1994 sk->err_hook = nl_async_err_hook;
95616c82 1995 sk->fd = fd;
05476c4d 1996 if (sk_open(sk) < 0)
95616c82 1997 bug("Netlink: sk_open failed");
95616c82
OZ
1998}
1999
9ddbfbdd 2000
95616c82
OZ
2001/*
2002 * Interface to the UNIX krt module
2003 */
2004
95616c82 2005void
9ddbfbdd
JMM
2006krt_sys_io_init(void)
2007{
05d47bd5 2008 nl_linpool = lp_new_default(krt_pool);
9ddbfbdd
JMM
2009 HASH_INIT(nl_table_map, krt_pool, 6);
2010}
2011
2012int
c6964c30 2013krt_sys_start(struct krt_proto *p)
95616c82 2014{
29a64162 2015 struct krt_proto *old = HASH_FIND(nl_table_map, RTH, p->af, krt_table_id(p));
9ddbfbdd
JMM
2016
2017 if (old)
2018 {
2019 log(L_ERR "%s: Kernel table %u already registered by %s",
2020 p->p.name, krt_table_id(p), old->p.name);
2021 return 0;
2022 }
2023
2024 HASH_INSERT2(nl_table_map, RTH, krt_pool, p);
c6964c30
OZ
2025
2026 nl_open();
2027 nl_open_async();
9ddbfbdd
JMM
2028
2029 return 1;
95616c82
OZ
2030}
2031
2032void
9ddbfbdd 2033krt_sys_shutdown(struct krt_proto *p)
95616c82 2034{
9ddbfbdd 2035 HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
95616c82
OZ
2036}
2037
2038int
2039krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
2040{
4adcb9df 2041 return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
95616c82
OZ
2042}
2043
95616c82
OZ
2044void
2045krt_sys_init_config(struct krt_config *cf)
2046{
2047 cf->sys.table_id = RT_TABLE_MAIN;
bff21441 2048 cf->sys.metric = 32;
95616c82
OZ
2049}
2050
2051void
2052krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
2053{
2054 d->sys.table_id = s->sys.table_id;
4adcb9df 2055 d->sys.metric = s->sys.metric;
95616c82
OZ
2056}
2057
9fdf9d29
OZ
2058static const char *krt_metrics_names[KRT_METRICS_MAX] = {
2059 NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
2060 "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
2061};
2062
2063static const char *krt_features_names[KRT_FEATURES_MAX] = {
2064 "ecn", NULL, NULL, "allfrag"
2065};
2066
2067int
2068krt_sys_get_attr(eattr *a, byte *buf, int buflen UNUSED)
2069{
2070 switch (a->id)
2071 {
2072 case EA_KRT_PREFSRC:
2073 bsprintf(buf, "prefsrc");
2074 return GA_NAME;
2075
2076 case EA_KRT_REALM:
2077 bsprintf(buf, "realm");
2078 return GA_NAME;
2079
6e75d0d2
OZ
2080 case EA_KRT_SCOPE:
2081 bsprintf(buf, "scope");
2082 return GA_NAME;
2083
9fdf9d29
OZ
2084 case EA_KRT_LOCK:
2085 buf += bsprintf(buf, "lock:");
2086 ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
2087 return GA_FULL;
2088
2089 case EA_KRT_FEATURES:
2090 buf += bsprintf(buf, "features:");
2091 ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
2092 return GA_FULL;
2093
2094 default:;
2095 int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET;
2096 if (id > 0 && id < KRT_METRICS_MAX)
2097 {
2098 bsprintf(buf, "%s", krt_metrics_names[id]);
2099 return GA_NAME;
2100 }
2101
2102 return GA_UNKNOWN;
2103 }
2104}
2105
95616c82
OZ
2106
2107
2108void
2109kif_sys_start(struct kif_proto *p UNUSED)
2110{
2111 nl_open();
2112 nl_open_async();
2113}
2114
2115void
2116kif_sys_shutdown(struct kif_proto *p UNUSED)
2117{
2118}
153f02da
OZ
2119
2120int
2121kif_update_sysdep_addr(struct iface *i UNUSED)
2122{
2123 return 0;
2124}