]> git.ipfire.org Git - thirdparty/bird.git/blame - sysdep/linux/netlink.c
Nest: Read Babel metric as IGP metric
[thirdparty/bird.git] / sysdep / linux / netlink.c
CommitLineData
95616c82
OZ
1/*
2 * BIRD -- Linux Netlink Interface
3 *
4 * (c) 1999--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
a8caff32 9#include <alloca.h>
95616c82 10#include <stdio.h>
f83ce94d 11#include <unistd.h>
95616c82
OZ
12#include <fcntl.h>
13#include <sys/socket.h>
14#include <sys/uio.h>
15#include <errno.h>
16
17#undef LOCAL_DEBUG
18
19#include "nest/bird.h"
20#include "nest/route.h"
21#include "nest/protocol.h"
22#include "nest/iface.h"
4e276a89 23#include "lib/alloca.h"
7152e5ef
JMM
24#include "sysdep/unix/unix.h"
25#include "sysdep/unix/krt.h"
95616c82
OZ
26#include "lib/socket.h"
27#include "lib/string.h"
9ddbfbdd 28#include "lib/hash.h"
95616c82
OZ
29#include "conf/conf.h"
30
31#include <asm/types.h>
32#include <linux/if.h>
33#include <linux/netlink.h>
34#include <linux/rtnetlink.h>
35
6b0f5f68
MJM
36#ifdef HAVE_MPLS_KERNEL
37#include <linux/lwtunnel.h>
38#endif
9ddbfbdd 39
95616c82
OZ
40#ifndef MSG_TRUNC /* Hack: Several versions of glibc miss this one :( */
41#define MSG_TRUNC 0x20
42#endif
43
a08a81c6
OZ
44#ifndef IFA_FLAGS
45#define IFA_FLAGS 8
46#endif
47
95616c82
OZ
48#ifndef IFF_LOWER_UP
49#define IFF_LOWER_UP 0x10000
50#endif
51
9ddbfbdd
JMM
52#ifndef RTA_TABLE
53#define RTA_TABLE 15
54#endif
55
d14f8c3c
JMM
56#ifndef RTA_VIA
57#define RTA_VIA 18
58#endif
59
60#ifndef RTA_NEWDST
61#define RTA_NEWDST 19
62#endif
63
64#ifndef RTA_ENCAP_TYPE
65#define RTA_ENCAP_TYPE 21
66#endif
67
68#ifndef RTA_ENCAP
69#define RTA_ENCAP 22
70#endif
9ddbfbdd 71
8235c474 72#define krt_ipv4(p) ((p)->af == AF_INET)
cc5b93f7 73#define krt_ecmp6(p) ((p)->af == AF_INET6)
2feaa693 74
517d05df
OZ
75const int rt_default_ecmp = 16;
76
2feaa693
OZ
77/*
78 * Structure nl_parse_state keeps state of received route processing. Ideally,
79 * we could just independently parse received Netlink messages and immediately
98bb80a2
OZ
80 * propagate received routes to the rest of BIRD, but older Linux kernel (before
81 * version 4.11) represents and announces IPv6 ECMP routes not as one route with
82 * multiple next hops (like RTA_MULTIPATH in IPv4 ECMP), but as a sequence of
83 * routes with the same prefix. More recent kernels work as with IPv4.
2feaa693
OZ
84 *
85 * Therefore, BIRD keeps currently processed route in nl_parse_state structure
86 * and postpones its propagation until we expect it to be final; i.e., when
87 * non-matching route is received or when the scan ends. When another matching
88 * route is received, it is merged with the already processed route to form an
89 * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
98bb80a2
OZ
90 * postponing is done in both cases (for simplicity). All IPv4 routes or IPv6
91 * routes with RTA_MULTIPATH set are just considered non-matching.
2feaa693
OZ
92 *
93 * This is ignored for asynchronous notifications (every notification is handled
94 * as a separate route). It is not an issue for our routes, as we ignore such
95 * notifications anyways. But importing alien IPv6 ECMP routes does not work
98bb80a2
OZ
96 * properly with older kernels.
97 *
98 * Whatever the kernel version is, IPv6 ECMP routes are sent as multiple routes
99 * for the same prefix.
2feaa693
OZ
100 */
101
102struct nl_parse_state
103{
104 struct linpool *pool;
105 int scan;
106 int merge;
107
108 net *net;
109 rta *attrs;
110 struct krt_proto *proto;
111 s8 new;
112 s8 krt_src;
113 u8 krt_type;
114 u8 krt_proto;
115 u32 krt_metric;
116};
117
95616c82
OZ
118/*
119 * Synchronous Netlink interface
120 */
121
122struct nl_sock
123{
124 int fd;
125 u32 seq;
126 byte *rx_buffer; /* Receive buffer */
127 struct nlmsghdr *last_hdr; /* Recently received packet */
ae80a2de 128 uint last_size;
95616c82
OZ
129};
130
131#define NL_RX_SIZE 8192
132
2feaa693
OZ
133#define NL_OP_DELETE 0
134#define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
135#define NL_OP_REPLACE (NLM_F_CREATE|NLM_F_REPLACE)
136#define NL_OP_APPEND (NLM_F_CREATE|NLM_F_APPEND)
137
138static linpool *nl_linpool;
139
95616c82
OZ
140static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for synchronous scan */
141static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */
142
143static void
144nl_open_sock(struct nl_sock *nl)
145{
146 if (nl->fd < 0)
147 {
148 nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
149 if (nl->fd < 0)
150 die("Unable to open rtnetlink socket: %m");
574b2324 151 nl->seq = (u32) (current_time() TO_S); /* Or perhaps random_u32() ? */
95616c82
OZ
152 nl->rx_buffer = xmalloc(NL_RX_SIZE);
153 nl->last_hdr = NULL;
154 nl->last_size = 0;
155 }
156}
157
158static void
159nl_open(void)
160{
161 nl_open_sock(&nl_scan);
162 nl_open_sock(&nl_req);
163}
164
165static void
166nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
167{
168 struct sockaddr_nl sa;
169
170 memset(&sa, 0, sizeof(sa));
171 sa.nl_family = AF_NETLINK;
172 nh->nlmsg_pid = 0;
173 nh->nlmsg_seq = ++(nl->seq);
53401bef 174 nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len);
95616c82
OZ
175 if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
176 die("rtnetlink sendto: %m");
177 nl->last_hdr = NULL;
178}
179
180static void
86c3eea0 181nl_request_dump(int af, int cmd)
95616c82
OZ
182{
183 struct {
184 struct nlmsghdr nh;
185 struct rtgenmsg g;
641172c6
OZ
186 } req = {
187 .nh.nlmsg_type = cmd,
188 .nh.nlmsg_len = sizeof(req),
189 .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
190 .g.rtgen_family = af
191 };
95616c82
OZ
192 nl_send(&nl_scan, &req.nh);
193}
194
195static struct nlmsghdr *
196nl_get_reply(struct nl_sock *nl)
197{
198 for(;;)
199 {
200 if (!nl->last_hdr)
201 {
202 struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
203 struct sockaddr_nl sa;
31e9e101
ST
204 struct msghdr m = {
205 .msg_name = &sa,
206 .msg_namelen = sizeof(sa),
207 .msg_iov = &iov,
208 .msg_iovlen = 1,
209 };
95616c82
OZ
210 int x = recvmsg(nl->fd, &m, 0);
211 if (x < 0)
212 die("nl_get_reply: %m");
213 if (sa.nl_pid) /* It isn't from the kernel */
214 {
215 DBG("Non-kernel packet\n");
216 continue;
217 }
218 nl->last_size = x;
219 nl->last_hdr = (void *) nl->rx_buffer;
220 if (m.msg_flags & MSG_TRUNC)
221 bug("nl_get_reply: got truncated reply which should be impossible");
222 }
223 if (NLMSG_OK(nl->last_hdr, nl->last_size))
224 {
225 struct nlmsghdr *h = nl->last_hdr;
226 nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
227 if (h->nlmsg_seq != nl->seq)
228 {
229 log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
230 h->nlmsg_seq, nl->seq);
231 continue;
232 }
233 return h;
234 }
235 if (nl->last_size)
236 log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
237 nl->last_hdr = NULL;
238 }
239}
240
1123e707 241static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
95616c82
OZ
242
243static int
2feaa693 244nl_error(struct nlmsghdr *h, int ignore_esrch)
95616c82
OZ
245{
246 struct nlmsgerr *e;
247 int ec;
248
249 if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
250 {
251 log(L_WARN "Netlink: Truncated error message received");
252 return ENOBUFS;
253 }
254 e = (struct nlmsgerr *) NLMSG_DATA(h);
255 ec = -e->error;
2feaa693 256 if (ec && !(ignore_esrch && (ec == ESRCH)))
95616c82
OZ
257 log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
258 return ec;
259}
260
261static struct nlmsghdr *
262nl_get_scan(void)
263{
264 struct nlmsghdr *h = nl_get_reply(&nl_scan);
265
266 if (h->nlmsg_type == NLMSG_DONE)
267 return NULL;
268 if (h->nlmsg_type == NLMSG_ERROR)
269 {
2feaa693 270 nl_error(h, 0);
95616c82
OZ
271 return NULL;
272 }
273 return h;
274}
275
276static int
2feaa693 277nl_exchange(struct nlmsghdr *pkt, int ignore_esrch)
95616c82
OZ
278{
279 struct nlmsghdr *h;
280
281 nl_send(&nl_req, pkt);
282 for(;;)
283 {
284 h = nl_get_reply(&nl_req);
285 if (h->nlmsg_type == NLMSG_ERROR)
286 break;
287 log(L_WARN "nl_exchange: Unexpected reply received");
288 }
2feaa693 289 return nl_error(h, ignore_esrch) ? -1 : 0;
95616c82
OZ
290}
291
292/*
293 * Netlink attributes
294 */
295
296static int nl_attr_len;
297
298static void *
299nl_checkin(struct nlmsghdr *h, int lsize)
300{
301 nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
302 if (nl_attr_len < 0)
303 {
304 log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
305 return NULL;
306 }
307 return NLMSG_DATA(h);
308}
309
ad276157
JMM
310struct nl_want_attrs {
311 u8 defined:1;
312 u8 checksize:1;
313 u8 size;
314};
315
316
317#define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
318
319static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = {
320 [IFLA_IFNAME] = { 1, 0, 0 },
321 [IFLA_MTU] = { 1, 1, sizeof(u32) },
943478b0 322 [IFLA_MASTER] = { 1, 1, sizeof(u32) },
ad276157
JMM
323 [IFLA_WIRELESS] = { 1, 0, 0 },
324};
325
29a64162 326
e37d2e3e 327#define BIRD_IFA_MAX (IFA_FLAGS+1)
ad276157 328
ad276157
JMM
329static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = {
330 [IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) },
331 [IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) },
332 [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) },
62e64905 333 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
ad276157 334};
29a64162 335
ad276157
JMM
336static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
337 [IFA_ADDRESS] = { 1, 1, sizeof(ip6_addr) },
338 [IFA_LOCAL] = { 1, 1, sizeof(ip6_addr) },
e37d2e3e 339 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
ad276157 340};
29a64162 341
ad276157 342
d14f8c3c 343#define BIRD_RTA_MAX (RTA_ENCAP+1)
ad276157 344
4e276a89 345static struct nl_want_attrs nexthop_attr_want4[BIRD_RTA_MAX] = {
ad276157 346 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
53401bef 347 [RTA_VIA] = { 1, 0, 0 },
d14f8c3c
JMM
348 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
349 [RTA_ENCAP] = { 1, 0, 0 },
350};
351
4ff15a75 352static struct nl_want_attrs nexthop_attr_want6[BIRD_RTA_MAX] = {
98bb80a2 353 [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
53401bef 354 [RTA_VIA] = { 1, 0, 0 },
4ff15a75
OZ
355 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
356 [RTA_ENCAP] = { 1, 0, 0 },
357};
358
6b0f5f68 359#ifdef HAVE_MPLS_KERNEL
f1b5f179
KY
360static struct nl_want_attrs nexthop_attr_want_mpls[BIRD_RTA_MAX] = {
361 [RTA_VIA] = { 1, 0, 0 },
362 [RTA_NEWDST] = { 1, 0, 0 },
363};
364
d14f8c3c
JMM
365static struct nl_want_attrs encap_mpls_want[BIRD_RTA_MAX] = {
366 [RTA_DST] = { 1, 0, 0 },
ad276157 367};
6b0f5f68 368#endif
ad276157 369
ad276157
JMM
370static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
371 [RTA_DST] = { 1, 1, sizeof(ip4_addr) },
372 [RTA_OIF] = { 1, 1, sizeof(u32) },
373 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
374 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
375 [RTA_PREFSRC] = { 1, 1, sizeof(ip4_addr) },
376 [RTA_METRICS] = { 1, 0, 0 },
377 [RTA_MULTIPATH] = { 1, 0, 0 },
378 [RTA_FLOW] = { 1, 1, sizeof(u32) },
379 [RTA_TABLE] = { 1, 1, sizeof(u32) },
53401bef 380 [RTA_VIA] = { 1, 0, 0 },
d14f8c3c
JMM
381 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
382 [RTA_ENCAP] = { 1, 0, 0 },
ad276157 383};
29a64162 384
ad276157
JMM
385static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
386 [RTA_DST] = { 1, 1, sizeof(ip6_addr) },
be17805c 387 [RTA_SRC] = { 1, 1, sizeof(ip6_addr) },
ad276157
JMM
388 [RTA_IIF] = { 1, 1, sizeof(u32) },
389 [RTA_OIF] = { 1, 1, sizeof(u32) },
390 [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
391 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
392 [RTA_PREFSRC] = { 1, 1, sizeof(ip6_addr) },
393 [RTA_METRICS] = { 1, 0, 0 },
98bb80a2 394 [RTA_MULTIPATH] = { 1, 0, 0 },
ad276157
JMM
395 [RTA_FLOW] = { 1, 1, sizeof(u32) },
396 [RTA_TABLE] = { 1, 1, sizeof(u32) },
53401bef 397 [RTA_VIA] = { 1, 0, 0 },
d14f8c3c
JMM
398 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
399 [RTA_ENCAP] = { 1, 0, 0 },
400};
401
6b0f5f68 402#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
403static struct nl_want_attrs rtm_attr_want_mpls[BIRD_RTA_MAX] = {
404 [RTA_DST] = { 1, 1, sizeof(u32) },
405 [RTA_IIF] = { 1, 1, sizeof(u32) },
406 [RTA_OIF] = { 1, 1, sizeof(u32) },
407 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
408 [RTA_METRICS] = { 1, 0, 0 },
f1b5f179 409 [RTA_MULTIPATH] = { 1, 0, 0 },
d14f8c3c
JMM
410 [RTA_FLOW] = { 1, 1, sizeof(u32) },
411 [RTA_TABLE] = { 1, 1, sizeof(u32) },
412 [RTA_VIA] = { 1, 0, 0 },
413 [RTA_NEWDST] = { 1, 0, 0 },
ad276157 414};
6b0f5f68 415#endif
ad276157
JMM
416
417
95616c82 418static int
ad276157 419nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, int ksize)
95616c82
OZ
420{
421 int max = ksize / sizeof(struct rtattr *);
422 bzero(k, ksize);
ad276157
JMM
423
424 for ( ; RTA_OK(a, nl_attr_len); a = RTA_NEXT(a, nl_attr_len))
95616c82 425 {
ad276157
JMM
426 if ((a->rta_type >= max) || !want[a->rta_type].defined)
427 continue;
428
429 if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size))
430 {
9b136840 431 log(L_ERR "nl_parse_attrs: Malformed attribute received");
ad276157
JMM
432 return 0;
433 }
434
435 k[a->rta_type] = a;
95616c82 436 }
ad276157 437
95616c82
OZ
438 if (nl_attr_len)
439 {
440 log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
441 return 0;
442 }
ad276157
JMM
443
444 return 1;
95616c82
OZ
445}
446
d14f8c3c
JMM
447static inline u16 rta_get_u16(struct rtattr *a)
448{ return *(u16 *) RTA_DATA(a); }
449
fce764f9 450static inline u32 rta_get_u32(struct rtattr *a)
acb04cfd
OZ
451{ return *(u32 *) RTA_DATA(a); }
452
453static inline ip4_addr rta_get_ip4(struct rtattr *a)
454{ return ip4_ntoh(*(ip4_addr *) RTA_DATA(a)); }
455
456static inline ip6_addr rta_get_ip6(struct rtattr *a)
457{ return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); }
458
9b136840
JMM
459static inline ip_addr rta_get_ipa(struct rtattr *a)
460{
461 if (RTA_PAYLOAD(a) == sizeof(ip4_addr))
462 return ipa_from_ip4(rta_get_ip4(a));
463 else
464 return ipa_from_ip6(rta_get_ip6(a));
465}
acb04cfd 466
6b0f5f68 467#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
468static inline ip_addr rta_get_via(struct rtattr *a)
469{
470 struct rtvia *v = RTA_DATA(a);
471 switch(v->rtvia_family) {
472 case AF_INET: return ipa_from_ip4(ip4_ntoh(*(ip4_addr *) v->rtvia_addr));
473 case AF_INET6: return ipa_from_ip6(ip6_ntoh(*(ip6_addr *) v->rtvia_addr));
474 }
475 return IPA_NONE;
476}
477
478static u32 rta_mpls_stack[MPLS_MAX_LABEL_STACK];
479static inline int rta_get_mpls(struct rtattr *a, u32 *stack)
480{
2eaf65ec
OZ
481 if (!a)
482 return 0;
483
d14f8c3c
JMM
484 if (RTA_PAYLOAD(a) % 4)
485 log(L_WARN "KRT: Strange length of received MPLS stack: %u", RTA_PAYLOAD(a));
486
2eaf65ec
OZ
487 int labels = mpls_get(RTA_DATA(a), RTA_PAYLOAD(a) & ~0x3, stack);
488
489 if (labels < 0)
490 {
491 log(L_WARN "KRT: Too long MPLS stack received, ignoring");
492 labels = 0;
493 }
494
495 return labels;
d14f8c3c 496}
6b0f5f68 497#endif
d14f8c3c 498
9fdf9d29
OZ
499struct rtattr *
500nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen)
95616c82 501{
9fdf9d29
OZ
502 uint pos = NLMSG_ALIGN(h->nlmsg_len);
503 uint len = RTA_LENGTH(dlen);
95616c82
OZ
504
505 if (pos + len > bufsize)
506 bug("nl_add_attr: packet buffer overflow");
9fdf9d29
OZ
507
508 struct rtattr *a = (struct rtattr *)((char *)h + pos);
95616c82
OZ
509 a->rta_type = code;
510 a->rta_len = len;
511 h->nlmsg_len = pos + len;
9fdf9d29
OZ
512
513 if (dlen > 0)
514 memcpy(RTA_DATA(a), data, dlen);
515
516 return a;
95616c82
OZ
517}
518
d14f8c3c
JMM
519static inline struct rtattr *
520nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
521{
522 return nl_add_attr(h, bufsize, code, NULL, 0);
523}
524
525static inline void
526nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
527{
528 a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a;
529}
530
531static inline void
532nl_add_attr_u16(struct nlmsghdr *h, uint bufsize, int code, u16 data)
533{
534 nl_add_attr(h, bufsize, code, &data, 2);
535}
536
95616c82 537static inline void
29a64162 538nl_add_attr_u32(struct nlmsghdr *h, uint bufsize, int code, u32 data)
95616c82
OZ
539{
540 nl_add_attr(h, bufsize, code, &data, 4);
541}
542
543static inline void
29a64162 544nl_add_attr_ip4(struct nlmsghdr *h, uint bufsize, int code, ip4_addr ip4)
95616c82 545{
29a64162
OZ
546 ip4 = ip4_hton(ip4);
547 nl_add_attr(h, bufsize, code, &ip4, sizeof(ip4));
548}
549
550static inline void
551nl_add_attr_ip6(struct nlmsghdr *h, uint bufsize, int code, ip6_addr ip6)
552{
553 ip6 = ip6_hton(ip6);
554 nl_add_attr(h, bufsize, code, &ip6, sizeof(ip6));
555}
556
557static inline void
558nl_add_attr_ipa(struct nlmsghdr *h, uint bufsize, int code, ip_addr ipa)
559{
560 if (ipa_is_ip4(ipa))
561 nl_add_attr_ip4(h, bufsize, code, ipa_to_ip4(ipa));
9b136840 562 else
29a64162 563 nl_add_attr_ip6(h, bufsize, code, ipa_to_ip6(ipa));
95616c82
OZ
564}
565
6b0f5f68 566#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
567static inline void
568nl_add_attr_mpls(struct nlmsghdr *h, uint bufsize, int code, int len, u32 *stack)
9fdf9d29 569{
d14f8c3c
JMM
570 char buf[len*4];
571 mpls_put(buf, len, stack);
572 nl_add_attr(h, bufsize, code, buf, len*4);
9fdf9d29 573}
95616c82
OZ
574
575static inline void
d14f8c3c 576nl_add_attr_mpls_encap(struct nlmsghdr *h, uint bufsize, int len, u32 *stack)
95616c82 577{
d14f8c3c
JMM
578 nl_add_attr_u16(h, bufsize, RTA_ENCAP_TYPE, LWTUNNEL_ENCAP_MPLS);
579
580 struct rtattr *nest = nl_open_attr(h, bufsize, RTA_ENCAP);
581 nl_add_attr_mpls(h, bufsize, RTA_DST, len, stack);
582 nl_close_attr(h, nest);
583}
584
585static inline void
586nl_add_attr_via(struct nlmsghdr *h, uint bufsize, ip_addr ipa)
587{
66acbc8d 588 struct rtvia *via = alloca(sizeof(struct rtvia) + 16);
d14f8c3c 589
62e64905
OZ
590 if (ipa_is_ip4(ipa))
591 {
d14f8c3c 592 via->rtvia_family = AF_INET;
62e64905 593 put_ip4(via->rtvia_addr, ipa_to_ip4(ipa));
66acbc8d 594 nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 4);
62e64905
OZ
595 }
596 else
597 {
d14f8c3c 598 via->rtvia_family = AF_INET6;
62e64905 599 put_ip6(via->rtvia_addr, ipa_to_ip6(ipa));
66acbc8d 600 nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 16);
d14f8c3c 601 }
95616c82 602}
6b0f5f68 603#endif
95616c82 604
9fdf9d29
OZ
605static inline struct rtnexthop *
606nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
607{
608 uint pos = NLMSG_ALIGN(h->nlmsg_len);
609 uint len = RTNH_LENGTH(0);
610
611 if (pos + len > bufsize)
612 bug("nl_open_nexthop: packet buffer overflow");
613
614 h->nlmsg_len = pos + len;
615
616 return (void *)h + pos;
617}
618
619static inline void
620nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh)
621{
622 nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh;
623}
95616c82 624
d14f8c3c 625static inline void
6b0f5f68 626nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af UNUSED)
d14f8c3c 627{
6b0f5f68 628#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
629 if (nh->labels > 0)
630 if (af == AF_MPLS)
631 nl_add_attr_mpls(h, bufsize, RTA_NEWDST, nh->labels, nh->label);
632 else
633 nl_add_attr_mpls_encap(h, bufsize, nh->labels, nh->label);
634
635 if (ipa_nonzero(nh->gw))
53401bef
OZ
636 {
637 if (af == (ipa_is_ip4(nh->gw) ? AF_INET : AF_INET6))
d14f8c3c 638 nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
53401bef
OZ
639 else
640 nl_add_attr_via(h, bufsize, nh->gw);
641 }
6b0f5f68
MJM
642#else
643
644 if (ipa_nonzero(nh->gw))
645 nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
646#endif
d14f8c3c
JMM
647}
648
95616c82 649static void
d14f8c3c 650nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af)
95616c82 651{
9fdf9d29
OZ
652 struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH);
653
95616c82 654 for (; nh; nh = nh->next)
9fdf9d29
OZ
655 {
656 struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize);
95616c82 657
9fdf9d29
OZ
658 rtnh->rtnh_flags = 0;
659 rtnh->rtnh_hops = nh->weight;
660 rtnh->rtnh_ifindex = nh->iface->index;
95616c82 661
d14f8c3c 662 nl_add_nexthop(h, bufsize, nh, af);
95616c82 663
a1f5e514
OZ
664 if (nh->flags & RNF_ONLINK)
665 rtnh->rtnh_flags |= RTNH_F_ONLINK;
666
9fdf9d29
OZ
667 nl_close_nexthop(h, rtnh);
668 }
669
670 nl_close_attr(h, a);
671}
95616c82 672
4e276a89 673static struct nexthop *
3e792350 674nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr *ra, int af)
95616c82 675{
ad276157 676 struct rtattr *a[BIRD_RTA_MAX];
95616c82 677 struct rtnexthop *nh = RTA_DATA(ra);
4e276a89 678 struct nexthop *rv, *first, **last;
3e236955 679 unsigned len = RTA_PAYLOAD(ra);
95616c82
OZ
680
681 first = NULL;
682 last = &first;
95616c82
OZ
683
684 while (len)
685 {
686 /* Use RTNH_OK(nh,len) ?? */
687 if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
688 return NULL;
689
3e792350 690 *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE);
95616c82
OZ
691 last = &(rv->next);
692
693 rv->weight = nh->rtnh_hops;
694 rv->iface = if_find_by_index(nh->rtnh_ifindex);
695 if (!rv->iface)
696 return NULL;
697
698 /* Nonexistent RTNH_PAYLOAD ?? */
699 nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
98bb80a2
OZ
700 switch (af)
701 {
98bb80a2 702 case AF_INET:
4ff15a75 703 if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a)))
98bb80a2
OZ
704 return NULL;
705 break;
4ff15a75 706
98bb80a2 707 case AF_INET6:
4ff15a75 708 if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want6, a, sizeof(a)))
98bb80a2
OZ
709 return NULL;
710 break;
4ff15a75 711
f1b5f179
KY
712#ifdef HAVE_MPLS_KERNEL
713 case AF_MPLS:
714 if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want_mpls, a, sizeof(a)))
715 return NULL;
716
717 if (a[RTA_NEWDST])
718 rv->labels = rta_get_mpls(a[RTA_NEWDST], rv->label);
719
720 break;
721#endif
722
98bb80a2
OZ
723 default:
724 return NULL;
725 }
726
95616c82 727 if (a[RTA_GATEWAY])
53401bef 728 rv->gw = rta_get_ipa(a[RTA_GATEWAY]);
95616c82 729
53401bef
OZ
730#ifdef HAVE_MPLS_KERNEL
731 if (a[RTA_VIA])
732 rv->gw = rta_get_via(a[RTA_VIA]);
733#endif
734
735 if (ipa_nonzero(rv->gw))
736 {
a1f5e514
OZ
737 if (nh->rtnh_flags & RTNH_F_ONLINK)
738 rv->flags |= RNF_ONLINK;
739
23c212e7 740 neighbor *nbr;
586c1800
OZ
741 nbr = neigh_find(&p->p, rv->gw, rv->iface,
742 (rv->flags & RNF_ONLINK) ? NEF_ONLINK : 0);
23c212e7 743 if (!nbr || (nbr->scope == SCOPE_HOST))
95616c82
OZ
744 return NULL;
745 }
62e64905 746
6b0f5f68 747#ifdef HAVE_MPLS_KERNEL
2eaf65ec 748 if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE])
6b0f5f68
MJM
749 {
750 if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS) {
751 log(L_WARN "KRT: Unknown encapsulation method %d in multipath", rta_get_u16(a[RTA_ENCAP_TYPE]));
752 return NULL;
d14f8c3c
JMM
753 }
754
6b0f5f68
MJM
755 struct rtattr *enca[BIRD_RTA_MAX];
756 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
757 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
758 rv->labels = rta_get_mpls(enca[RTA_DST], rv->label);
6b0f5f68
MJM
759 }
760#endif
761
95616c82
OZ
762
763 len -= NLMSG_ALIGN(nh->rtnh_len);
764 nh = RTNH_NEXT(nh);
765 }
766
59d3a361
OZ
767 /* Ensure nexthops are sorted to satisfy nest invariant */
768 if (!nexthop_is_sorted(first))
769 first = nexthop_sort(first);
770
95616c82
OZ
771 return first;
772}
773
9fdf9d29
OZ
774static void
775nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, int max)
776{
777 struct rtattr *a = nl_open_attr(h, bufsize, RTA_METRICS);
778 int t;
779
780 for (t = 1; t < max; t++)
781 if (metrics[0] & (1 << t))
782 nl_add_attr_u32(h, bufsize, t, metrics[t]);
783
784 nl_close_attr(h, a);
785}
786
787static int
788nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int max)
789{
790 struct rtattr *a = RTA_DATA(hdr);
791 int len = RTA_PAYLOAD(hdr);
792
793 metrics[0] = 0;
794 for (; RTA_OK(a, len); a = RTA_NEXT(a, len))
795 {
796 if (a->rta_type == RTA_UNSPEC)
797 continue;
798
799 if (a->rta_type >= max)
800 continue;
801
802 if (RTA_PAYLOAD(a) != 4)
803 return -1;
804
805 metrics[0] |= 1 << a->rta_type;
acb04cfd 806 metrics[a->rta_type] = rta_get_u32(a);
9fdf9d29
OZ
807 }
808
809 if (len > 0)
810 return -1;
811
812 return 0;
813}
814
95616c82
OZ
815
816/*
817 * Scanning of interfaces
818 */
819
820static void
821nl_parse_link(struct nlmsghdr *h, int scan)
822{
823 struct ifinfomsg *i;
ad276157 824 struct rtattr *a[BIRD_IFLA_MAX];
95616c82
OZ
825 int new = h->nlmsg_type == RTM_NEWLINK;
826 struct iface f = {};
827 struct iface *ifi;
828 char *name;
943478b0 829 u32 mtu, master = 0;
ae80a2de 830 uint fl;
95616c82 831
ad276157 832 if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), ifla_attr_want, a, sizeof(a)))
95616c82 833 return;
ad276157 834 if (!a[IFLA_IFNAME] || (RTA_PAYLOAD(a[IFLA_IFNAME]) < 2) || !a[IFLA_MTU])
95616c82 835 {
ad276157
JMM
836 /*
837 * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
838 * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
839 * We simply ignore all such messages with IFLA_WIRELESS without notice.
840 */
841
842 if (a[IFLA_WIRELESS])
843 return;
844
845 log(L_ERR "KIF: Malformed message received");
95616c82
OZ
846 return;
847 }
ad276157 848
95616c82 849 name = RTA_DATA(a[IFLA_IFNAME]);
acb04cfd 850 mtu = rta_get_u32(a[IFLA_MTU]);
95616c82 851
943478b0
OZ
852 if (a[IFLA_MASTER])
853 master = rta_get_u32(a[IFLA_MASTER]);
854
95616c82
OZ
855 ifi = if_find_by_index(i->ifi_index);
856 if (!new)
857 {
858 DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
859 if (!ifi)
860 return;
861
862 if_delete(ifi);
863 }
864 else
865 {
866 DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
867 if (ifi && strncmp(ifi->name, name, sizeof(ifi->name)-1))
868 if_delete(ifi);
869
870 strncpy(f.name, name, sizeof(f.name)-1);
871 f.index = i->ifi_index;
872 f.mtu = mtu;
873
943478b0
OZ
874 f.master_index = master;
875 f.master = if_find_by_index(master);
876
95616c82
OZ
877 fl = i->ifi_flags;
878 if (fl & IFF_UP)
879 f.flags |= IF_ADMIN_UP;
880 if (fl & IFF_LOWER_UP)
881 f.flags |= IF_LINK_UP;
882 if (fl & IFF_LOOPBACK) /* Loopback */
883 f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
884 else if (fl & IFF_POINTOPOINT) /* PtP */
885 f.flags |= IF_MULTICAST;
886 else if (fl & IFF_BROADCAST) /* Broadcast */
887 f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
888 else
889 f.flags |= IF_MULTIACCESS; /* NBMA */
3216eb03 890
16a3254c
OZ
891 if (fl & IFF_MULTICAST)
892 f.flags |= IF_MULTICAST;
893
3216eb03
OZ
894 ifi = if_update(&f);
895
896 if (!scan)
897 if_end_partial_update(ifi);
95616c82
OZ
898 }
899}
900
901static void
9b136840 902nl_parse_addr4(struct ifaddrmsg *i, int scan, int new)
95616c82 903{
ad276157 904 struct rtattr *a[BIRD_IFA_MAX];
95616c82 905 struct iface *ifi;
e37d2e3e 906 u32 ifa_flags;
95616c82
OZ
907 int scope;
908
9b136840 909 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a)))
95616c82 910 return;
ad276157 911
9b136840 912 if (!a[IFA_LOCAL])
ad276157 913 {
9b136840
JMM
914 log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)");
915 return;
ad276157 916 }
ad276157 917 if (!a[IFA_ADDRESS])
95616c82 918 {
ad276157 919 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
95616c82
OZ
920 return;
921 }
922
923 ifi = if_find_by_index(i->ifa_index);
924 if (!ifi)
925 {
926 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
927 return;
928 }
929
e37d2e3e
OZ
930 if (a[IFA_FLAGS])
931 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
932 else
933 ifa_flags = i->ifa_flags;
934
9b136840 935 struct ifa ifa;
95616c82
OZ
936 bzero(&ifa, sizeof(ifa));
937 ifa.iface = ifi;
cc5b93f7 938 if (ifa_flags & IFA_F_SECONDARY)
95616c82
OZ
939 ifa.flags |= IA_SECONDARY;
940
9b136840
JMM
941 ifa.ip = rta_get_ipa(a[IFA_LOCAL]);
942
d7661fbe 943 if (i->ifa_prefixlen > IP4_MAX_PREFIX_LENGTH)
95616c82
OZ
944 {
945 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
946 new = 0;
947 }
d7661fbe 948 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH)
95616c82 949 {
9b136840
JMM
950 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
951 net_fill_ip4(&ifa.prefix, rta_get_ip4(a[IFA_ADDRESS]), i->ifa_prefixlen);
95616c82
OZ
952
953 /* It is either a host address or a peer address */
9b136840 954 if (ipa_equal(ifa.ip, ifa.brd))
95616c82
OZ
955 ifa.flags |= IA_HOST;
956 else
957 {
958 ifa.flags |= IA_PEER;
9b136840 959 ifa.opposite = ifa.brd;
95616c82
OZ
960 }
961 }
962 else
963 {
9b136840
JMM
964 net_fill_ip4(&ifa.prefix, ipa_to_ip4(ifa.ip), i->ifa_prefixlen);
965 net_normalize(&ifa.prefix);
966
d7661fbe 967 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 1)
95616c82
OZ
968 ifa.opposite = ipa_opposite_m1(ifa.ip);
969
d7661fbe 970 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 2)
95616c82
OZ
971 ifa.opposite = ipa_opposite_m2(ifa.ip);
972
e2630a49
OZ
973 if (ifi->flags & IF_BROADCAST)
974 {
975 /* If kernel offers us a broadcast address, we trust it */
976 if (a[IFA_BROADCAST])
977 ifa.brd = ipa_from_ip4(rta_get_ip4(a[IFA_BROADCAST]));
978 /* Otherwise we create one (except for /31) */
979 else if (i->ifa_prefixlen < (IP4_MAX_PREFIX_LENGTH - 1))
980 ifa.brd = ipa_from_ip4(ip4_or(ipa_to_ip4(ifa.ip),
981 ip4_not(ip4_mkmask(i->ifa_prefixlen))));
9b136840
JMM
982 }
983 }
984
985 scope = ipa_classify(ifa.ip);
986 if (scope < 0)
987 {
988 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
989 return;
990 }
991 ifa.scope = scope & IADDR_SCOPE_MASK;
992
993 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
994 ifi->index, ifi->name,
995 new ? "added" : "removed",
4659b2ae 996 ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite);
9b136840
JMM
997
998 if (new)
999 ifa_update(&ifa);
1000 else
1001 ifa_delete(&ifa);
1002
1003 if (!scan)
1004 if_end_partial_update(ifi);
1005}
1006
1007static void
1008nl_parse_addr6(struct ifaddrmsg *i, int scan, int new)
1009{
1010 struct rtattr *a[BIRD_IFA_MAX];
1011 struct iface *ifi;
cc5b93f7 1012 u32 ifa_flags;
9b136840
JMM
1013 int scope;
1014
1015 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a)))
1016 return;
1017
1018 if (!a[IFA_ADDRESS])
1019 {
1020 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
1021 return;
1022 }
1023
1024 ifi = if_find_by_index(i->ifa_index);
1025 if (!ifi)
1026 {
1027 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
1028 return;
1029 }
1030
cc5b93f7
OZ
1031 if (a[IFA_FLAGS])
1032 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
1033 else
1034 ifa_flags = i->ifa_flags;
1035
9b136840
JMM
1036 struct ifa ifa;
1037 bzero(&ifa, sizeof(ifa));
1038 ifa.iface = ifi;
e37d2e3e 1039 if (ifa_flags & IFA_F_SECONDARY)
9b136840
JMM
1040 ifa.flags |= IA_SECONDARY;
1041
e37d2e3e
OZ
1042 /* Ignore tentative addresses silently */
1043 if (ifa_flags & IFA_F_TENTATIVE)
1044 return;
9b136840 1045
95616c82 1046 /* IFA_LOCAL can be unset for IPv6 interfaces */
9b136840
JMM
1047 ifa.ip = rta_get_ipa(a[IFA_LOCAL] ? : a[IFA_ADDRESS]);
1048
d7661fbe 1049 if (i->ifa_prefixlen > IP6_MAX_PREFIX_LENGTH)
9b136840
JMM
1050 {
1051 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
1052 new = 0;
1053 }
d7661fbe 1054 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH)
9b136840
JMM
1055 {
1056 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
1057 net_fill_ip6(&ifa.prefix, rta_get_ip6(a[IFA_ADDRESS]), i->ifa_prefixlen);
1058
1059 /* It is either a host address or a peer address */
1060 if (ipa_equal(ifa.ip, ifa.brd))
1061 ifa.flags |= IA_HOST;
1062 else
1063 {
1064 ifa.flags |= IA_PEER;
1065 ifa.opposite = ifa.brd;
95616c82 1066 }
9b136840
JMM
1067 }
1068 else
1069 {
1070 net_fill_ip6(&ifa.prefix, ipa_to_ip6(ifa.ip), i->ifa_prefixlen);
1071 net_normalize(&ifa.prefix);
1072
d7661fbe 1073 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH - 1)
9b136840 1074 ifa.opposite = ipa_opposite_m1(ifa.ip);
95616c82
OZ
1075 }
1076
1077 scope = ipa_classify(ifa.ip);
1078 if (scope < 0)
1079 {
1080 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
1081 return;
1082 }
1083 ifa.scope = scope & IADDR_SCOPE_MASK;
1084
9b136840 1085 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
95616c82
OZ
1086 ifi->index, ifi->name,
1087 new ? "added" : "removed",
4659b2ae 1088 ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite);
3216eb03 1089
95616c82
OZ
1090 if (new)
1091 ifa_update(&ifa);
1092 else
1093 ifa_delete(&ifa);
3216eb03
OZ
1094
1095 if (!scan)
1096 if_end_partial_update(ifi);
95616c82
OZ
1097}
1098
9b136840
JMM
1099static void
1100nl_parse_addr(struct nlmsghdr *h, int scan)
1101{
1102 struct ifaddrmsg *i;
1103
1104 if (!(i = nl_checkin(h, sizeof(*i))))
1105 return;
1106
1107 int new = (h->nlmsg_type == RTM_NEWADDR);
1108
1109 switch (i->ifa_family)
1110 {
9b136840
JMM
1111 case AF_INET:
1112 return nl_parse_addr4(i, scan, new);
29a64162 1113
9b136840
JMM
1114 case AF_INET6:
1115 return nl_parse_addr6(i, scan, new);
9b136840
JMM
1116 }
1117}
1118
95616c82
OZ
1119void
1120kif_do_scan(struct kif_proto *p UNUSED)
1121{
1122 struct nlmsghdr *h;
1123
1124 if_start_update();
1125
86c3eea0 1126 nl_request_dump(AF_UNSPEC, RTM_GETLINK);
95616c82
OZ
1127 while (h = nl_get_scan())
1128 if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
1129 nl_parse_link(h, 1);
1130 else
1131 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1132
943478b0
OZ
1133 /* Re-resolve master interface for slaves */
1134 struct iface *i;
1135 WALK_LIST(i, iface_list)
1136 if (i->master_index)
1137 {
1138 struct iface f = {
1139 .flags = i->flags,
1140 .mtu = i->mtu,
1141 .index = i->index,
1142 .master_index = i->master_index,
1143 .master = if_find_by_index(i->master_index)
1144 };
1145
1146 if (f.master != i->master)
1147 {
1148 memcpy(f.name, i->name, sizeof(f.name));
1149 if_update(&f);
1150 }
1151 }
1152
d7661fbe 1153 nl_request_dump(AF_INET, RTM_GETADDR);
95616c82
OZ
1154 while (h = nl_get_scan())
1155 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
3216eb03 1156 nl_parse_addr(h, 1);
95616c82
OZ
1157 else
1158 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1159
d7661fbe
JMM
1160 nl_request_dump(AF_INET6, RTM_GETADDR);
1161 while (h = nl_get_scan())
1162 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
1163 nl_parse_addr(h, 1);
1164 else
1165 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1166
95616c82
OZ
1167 if_end_update();
1168}
1169
1170/*
1171 * Routes
1172 */
1173
9ddbfbdd
JMM
1174static inline u32
1175krt_table_id(struct krt_proto *p)
1176{
1177 return KRT_CF->sys.table_id;
1178}
1179
1180static HASH(struct krt_proto) nl_table_map;
1181
29a64162
OZ
1182#define RTH_KEY(p) p->af, krt_table_id(p)
1183#define RTH_NEXT(p) p->sys.hash_next
1184#define RTH_EQ(a1,i1,a2,i2) a1 == a2 && i1 == i2
1185#define RTH_FN(a,i) a ^ u32_hash(i)
9ddbfbdd
JMM
1186
1187#define RTH_REHASH rth_rehash
1188#define RTH_PARAMS /8, *2, 2, 2, 6, 20
1189
1190HASH_DEFINE_REHASH_FN(RTH, struct krt_proto)
95616c82
OZ
1191
1192int
1193krt_capable(rte *e)
1194{
1195 rta *a = e->attrs;
1196
95616c82 1197 switch (a->dest)
62e64905 1198 {
4e276a89 1199 case RTD_UNICAST:
95616c82
OZ
1200 case RTD_BLACKHOLE:
1201 case RTD_UNREACHABLE:
1202 case RTD_PROHIBIT:
62e64905
OZ
1203 return 1;
1204
95616c82
OZ
1205 default:
1206 return 0;
62e64905 1207 }
95616c82
OZ
1208}
1209
1210static inline int
4e276a89 1211nh_bufsize(struct nexthop *nh)
95616c82
OZ
1212{
1213 int rv = 0;
1214 for (; nh != NULL; nh = nh->next)
9fdf9d29 1215 rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
95616c82
OZ
1216 return rv;
1217}
1218
1219static int
13c0be19 1220nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh)
95616c82
OZ
1221{
1222 eattr *ea;
1223 net *net = e->net;
1224 rta *a = e->attrs;
13c0be19 1225 ea_list *eattrs = a->eattrs;
4e276a89 1226 int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh));
4adcb9df 1227 u32 priority = 0;
a8caff32 1228
95616c82
OZ
1229 struct {
1230 struct nlmsghdr h;
1231 struct rtmsg r;
a8caff32
JMM
1232 char buf[0];
1233 } *r;
1234
1235 int rsize = sizeof(*r) + bufsize;
1236 r = alloca(rsize);
95616c82 1237
cc5b93f7 1238 DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op);
95616c82 1239
a8caff32
JMM
1240 bzero(&r->h, sizeof(r->h));
1241 bzero(&r->r, sizeof(r->r));
cc5b93f7 1242 r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE;
a8caff32 1243 r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
cc5b93f7 1244 r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK;
95616c82 1245
a8caff32
JMM
1246 r->r.rtm_family = p->af;
1247 r->r.rtm_dst_len = net_pxlen(net->n.addr);
1248 r->r.rtm_protocol = RTPROT_BIRD;
7074be22 1249 r->r.rtm_scope = RT_SCOPE_NOWHERE;
6b0f5f68 1250#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
1251 if (p->af == AF_MPLS)
1252 {
66acbc8d
OZ
1253 /*
1254 * Kernel MPLS code is a bit picky. We must:
1255 * 1) Always set RT_SCOPE_UNIVERSE and RTN_UNICAST (even for RTM_DELROUTE)
1256 * 2) Never use RTA_PRIORITY
1257 */
1258
d14f8c3c
JMM
1259 u32 label = net_mpls(net->n.addr);
1260 nl_add_attr_mpls(&r->h, rsize, RTA_DST, 1, &label);
66acbc8d
OZ
1261 r->r.rtm_scope = RT_SCOPE_UNIVERSE;
1262 r->r.rtm_type = RTN_UNICAST;
d14f8c3c
JMM
1263 }
1264 else
6b0f5f68 1265#endif
be17805c 1266 {
d14f8c3c 1267 nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr));
95616c82 1268
be17805c
OZ
1269 /* Add source address for IPv6 SADR routes */
1270 if (net->n.addr->type == NET_IP6_SADR)
1271 {
1272 net_addr_ip6_sadr *a = (void *) &net->n.addr;
1273 nl_add_attr_ip6(&r->h, rsize, RTA_SRC, a->src_prefix);
1274 r->r.rtm_src_len = a->src_pxlen;
1275 }
1276 }
1277
2feaa693
OZ
1278 /*
1279 * Strange behavior for RTM_DELROUTE:
1280 * 1) rtm_family is ignored in IPv6, works for IPv4
1281 * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
1282 * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
1283 */
1284
9ddbfbdd 1285 if (krt_table_id(p) < 256)
a8caff32 1286 r->r.rtm_table = krt_table_id(p);
9ddbfbdd 1287 else
a8caff32 1288 nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p));
9ddbfbdd 1289
66acbc8d
OZ
1290 if (p->af == AF_MPLS)
1291 priority = 0;
1292 else if (a->source == RTS_DUMMY)
4adcb9df
OZ
1293 priority = e->u.krt.metric;
1294 else if (KRT_CF->sys.metric)
1295 priority = KRT_CF->sys.metric;
1296 else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC)))
1297 priority = ea->u.data;
78a2cc28 1298
4adcb9df 1299 if (priority)
d1b8fe93 1300 nl_add_attr_u32(&r->h, rsize, RTA_PRIORITY, priority);
78a2cc28 1301
2feaa693
OZ
1302 /* For route delete, we do not specify remaining route attributes */
1303 if (op == NL_OP_DELETE)
1304 goto dest;
78a2cc28 1305
6e75d0d2 1306 /* Default scope is LINK for device routes, UNIVERSE otherwise */
66acbc8d
OZ
1307 if (p->af == AF_MPLS)
1308 r->r.rtm_scope = RT_SCOPE_UNIVERSE;
1309 else if (ea = ea_find(eattrs, EA_KRT_SCOPE))
cc5b93f7 1310 r->r.rtm_scope = ea->u.data;
6e75d0d2 1311 else
4e276a89 1312 r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
95616c82
OZ
1313
1314 if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
a8caff32 1315 nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
95616c82
OZ
1316
1317 if (ea = ea_find(eattrs, EA_KRT_REALM))
a8caff32 1318 nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data);
95616c82 1319
9fdf9d29
OZ
1320
1321 u32 metrics[KRT_METRICS_MAX];
1322 metrics[0] = 0;
1323
1324 struct ea_walk_state ews = { .eattrs = eattrs };
1325 while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX))
1326 {
1327 int id = ea->id - EA_KRT_METRICS;
1328 metrics[0] |= 1 << id;
1329 metrics[id] = ea->u.data;
1330 }
1331
1332 if (metrics[0])
a8caff32 1333 nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX);
9fdf9d29
OZ
1334
1335
2feaa693 1336dest:
2feaa693 1337 switch (dest)
95616c82 1338 {
4e276a89 1339 case RTD_UNICAST:
a8caff32 1340 r->r.rtm_type = RTN_UNICAST;
4e276a89 1341 if (nh->next && !krt_ecmp6(p))
d14f8c3c 1342 nl_add_multipath(&r->h, rsize, nh, p->af);
4e276a89
JMM
1343 else
1344 {
1345 nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index);
d14f8c3c 1346 nl_add_nexthop(&r->h, rsize, nh, p->af);
a1f5e514
OZ
1347
1348 if (nh->flags & RNF_ONLINK)
1349 r->r.rtm_flags |= RTNH_F_ONLINK;
4e276a89 1350 }
95616c82
OZ
1351 break;
1352 case RTD_BLACKHOLE:
a8caff32 1353 r->r.rtm_type = RTN_BLACKHOLE;
95616c82
OZ
1354 break;
1355 case RTD_UNREACHABLE:
a8caff32 1356 r->r.rtm_type = RTN_UNREACHABLE;
95616c82
OZ
1357 break;
1358 case RTD_PROHIBIT:
a8caff32 1359 r->r.rtm_type = RTN_PROHIBIT;
95616c82 1360 break;
2feaa693
OZ
1361 case RTD_NONE:
1362 break;
95616c82
OZ
1363 default:
1364 bug("krt_capable inconsistent with nl_send_route");
1365 }
1366
2feaa693 1367 /* Ignore missing for DELETE */
cc5b93f7 1368 return nl_exchange(&r->h, (op == NL_OP_DELETE));
2feaa693
OZ
1369}
1370
1371static inline int
13c0be19 1372nl_add_rte(struct krt_proto *p, rte *e)
2feaa693
OZ
1373{
1374 rta *a = e->attrs;
1375 int err = 0;
1376
4e276a89 1377 if (krt_ecmp6(p) && a->nh.next)
2feaa693 1378 {
4e276a89 1379 struct nexthop *nh = &(a->nh);
2feaa693 1380
13c0be19 1381 err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, nh);
2feaa693
OZ
1382 if (err < 0)
1383 return err;
1384
1385 for (nh = nh->next; nh; nh = nh->next)
13c0be19 1386 err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, nh);
2feaa693
OZ
1387
1388 return err;
1389 }
1390
13c0be19 1391 return nl_send_route(p, e, NL_OP_ADD, a->dest, &(a->nh));
2feaa693
OZ
1392}
1393
1394static inline int
13c0be19 1395nl_delete_rte(struct krt_proto *p, rte *e)
2feaa693
OZ
1396{
1397 int err = 0;
1398
1399 /* For IPv6, we just repeatedly request DELETE until we get error */
1400 do
13c0be19 1401 err = nl_send_route(p, e, NL_OP_DELETE, RTD_NONE, NULL);
2feaa693
OZ
1402 while (krt_ecmp6(p) && !err);
1403
1404 return err;
95616c82
OZ
1405}
1406
8235c474
OZ
1407static inline int
1408nl_replace_rte(struct krt_proto *p, rte *e)
1409{
1410 rta *a = e->attrs;
1411 return nl_send_route(p, e, NL_OP_REPLACE, a->dest, &(a->nh));
1412}
1413
1414
95616c82 1415void
cc75b3e1 1416krt_replace_rte(struct krt_proto *p, net *n UNUSED, rte *new, rte *old)
95616c82
OZ
1417{
1418 int err = 0;
1419
1420 /*
8235c474
OZ
1421 * We use NL_OP_REPLACE for IPv4, it has an issue with not checking for
1422 * matching rtm_protocol, but that is OK when dedicated priority is used.
2feaa693 1423 *
8235c474
OZ
1424 * We do not use NL_OP_REPLACE for IPv6, as it has broken semantics for ECMP
1425 * and with some kernel versions ECMP replace crashes kernel. Would need more
1426 * testing and checks for kernel versions.
2feaa693 1427 *
8235c474
OZ
1428 * For IPv6, we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the
1429 * old route value, so we do not try to optimize IPv6 ECMP reconfigurations.
95616c82
OZ
1430 */
1431
8235c474
OZ
1432 if (krt_ipv4(p) && old && new)
1433 {
1434 err = nl_replace_rte(p, new);
1435 }
1436 else
1437 {
1438 if (old)
1439 nl_delete_rte(p, old);
95616c82 1440
8235c474
OZ
1441 if (new)
1442 err = nl_add_rte(p, new);
1443 }
95616c82 1444
cc75b3e1
OZ
1445 if (new)
1446 {
1447 if (err < 0)
1448 bmap_clear(&p->sync_map, new->id);
1449 else
1450 bmap_set(&p->sync_map, new->id);
1451 }
95616c82
OZ
1452}
1453
2feaa693 1454static int
1187627a 1455nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type, uint rtm_family)
2feaa693 1456{
1187627a
OZ
1457 /* Route merging is used for IPv6 scans */
1458 if (!s->scan || (rtm_family != AF_INET6))
2feaa693
OZ
1459 return 0;
1460
1461 /* Saved and new route must have same network, proto/table, and priority */
1462 if ((s->net != net) || (s->proto != p) || (s->krt_metric != priority))
1463 return 0;
1464
1465 /* Both must be regular unicast routes */
1466 if ((s->krt_type != RTN_UNICAST) || (krt_type != RTN_UNICAST))
1467 return 0;
1468
1469 return 1;
1470}
1471
1472static void
1473nl_announce_route(struct nl_parse_state *s)
1474{
1475 rte *e = rte_get_temp(s->attrs);
1476 e->net = s->net;
1477 e->u.krt.src = s->krt_src;
1478 e->u.krt.proto = s->krt_proto;
1479 e->u.krt.seen = 0;
1480 e->u.krt.best = 0;
1481 e->u.krt.metric = s->krt_metric;
1482
1483 if (s->scan)
1484 krt_got_route(s->proto, e);
1485 else
1486 krt_got_route_async(s->proto, e, s->new);
1487
1488 s->net = NULL;
1489 s->attrs = NULL;
1490 s->proto = NULL;
1491 lp_flush(s->pool);
1492}
1493
1494static inline void
1187627a 1495nl_parse_begin(struct nl_parse_state *s, int scan)
2feaa693
OZ
1496{
1497 memset(s, 0, sizeof (struct nl_parse_state));
1498 s->pool = nl_linpool;
1499 s->scan = scan;
2feaa693
OZ
1500}
1501
1502static inline void
1503nl_parse_end(struct nl_parse_state *s)
1504{
1505 if (s->net)
1506 nl_announce_route(s);
1507}
1508
1509
95616c82
OZ
1510#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
1511
1512static void
2feaa693 1513nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
95616c82
OZ
1514{
1515 struct krt_proto *p;
1516 struct rtmsg *i;
ad276157 1517 struct rtattr *a[BIRD_RTA_MAX];
95616c82
OZ
1518 int new = h->nlmsg_type == RTM_NEWROUTE;
1519
be17805c 1520 net_addr dst, src = {};
95616c82 1521 u32 oif = ~0;
29a64162 1522 u32 table_id;
2feaa693 1523 u32 priority = 0;
6e75d0d2 1524 u32 def_scope = RT_SCOPE_UNIVERSE;
be17805c 1525 int krt_src;
95616c82 1526
ad276157 1527 if (!(i = nl_checkin(h, sizeof(*i))))
95616c82 1528 return;
ad276157
JMM
1529
1530 switch (i->rtm_family)
95616c82 1531 {
29a64162
OZ
1532 case AF_INET:
1533 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a)))
1534 return;
1535
1536 if (a[RTA_DST])
1537 net_fill_ip4(&dst, rta_get_ip4(a[RTA_DST]), i->rtm_dst_len);
1538 else
1539 net_fill_ip4(&dst, IP4_NONE, 0);
1540 break;
1541
cc5b93f7
OZ
1542 case AF_INET6:
1543 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a)))
1544 return;
29a64162
OZ
1545
1546 if (a[RTA_DST])
1547 net_fill_ip6(&dst, rta_get_ip6(a[RTA_DST]), i->rtm_dst_len);
1548 else
1549 net_fill_ip6(&dst, IP6_NONE, 0);
be17805c
OZ
1550
1551 if (a[RTA_SRC])
1552 net_fill_ip6(&src, rta_get_ip6(a[RTA_SRC]), i->rtm_src_len);
1553 else
1554 net_fill_ip6(&src, IP6_NONE, 0);
29a64162
OZ
1555 break;
1556
6b0f5f68 1557#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
1558 case AF_MPLS:
1559 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want_mpls, a, sizeof(a)))
1560 return;
1561
ed610044
OZ
1562 if (!a[RTA_DST])
1563 SKIP("MPLS route without RTA_DST");
1564
1565 if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) != 1)
1566 SKIP("MPLS route with multi-label RTA_DST");
1567
1568 net_fill_mpls(&dst, rta_mpls_stack[0]);
d14f8c3c 1569 break;
6b0f5f68 1570#endif
d14f8c3c 1571
29a64162
OZ
1572 default:
1573 return;
95616c82
OZ
1574 }
1575
95616c82 1576 if (a[RTA_OIF])
acb04cfd 1577 oif = rta_get_u32(a[RTA_OIF]);
95616c82 1578
9ddbfbdd 1579 if (a[RTA_TABLE])
29a64162 1580 table_id = rta_get_u32(a[RTA_TABLE]);
9ddbfbdd 1581 else
29a64162 1582 table_id = i->rtm_table;
9ddbfbdd 1583
29a64162
OZ
1584 /* Do we know this table? */
1585 p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id);
95616c82 1586 if (!p)
4659b2ae 1587 SKIP("unknown table %u\n", table_id);
95616c82 1588
be17805c
OZ
1589 if (a[RTA_SRC] && (p->p.net_type != NET_IP6_SADR))
1590 SKIP("src prefix for non-SADR channel\n");
1591
95616c82
OZ
1592 if (a[RTA_IIF])
1593 SKIP("IIF set\n");
29a64162 1594
95616c82
OZ
1595 if (i->rtm_tos != 0) /* We don't support TOS */
1596 SKIP("TOS %02x\n", i->rtm_tos);
95616c82 1597
2feaa693 1598 if (s->scan && !new)
95616c82
OZ
1599 SKIP("RTM_DELROUTE in scan\n");
1600
2feaa693
OZ
1601 if (a[RTA_PRIORITY])
1602 priority = rta_get_u32(a[RTA_PRIORITY]);
1603
9b136840 1604 int c = net_classify(&dst);
95616c82
OZ
1605 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
1606 SKIP("strange class/scope\n");
1607
95616c82
OZ
1608 switch (i->rtm_protocol)
1609 {
1610 case RTPROT_UNSPEC:
1611 SKIP("proto unspec\n");
1612
1613 case RTPROT_REDIRECT:
be17805c 1614 krt_src = KRT_SRC_REDIRECT;
95616c82
OZ
1615 break;
1616
1617 case RTPROT_KERNEL:
be17805c 1618 krt_src = KRT_SRC_KERNEL;
95616c82
OZ
1619 return;
1620
1621 case RTPROT_BIRD:
2feaa693 1622 if (!s->scan)
95616c82 1623 SKIP("echo\n");
be17805c 1624 krt_src = KRT_SRC_BIRD;
95616c82
OZ
1625 break;
1626
1627 case RTPROT_BOOT:
1628 default:
be17805c 1629 krt_src = KRT_SRC_ALIEN;
95616c82
OZ
1630 }
1631
be17805c
OZ
1632 net_addr *n = &dst;
1633 if (p->p.net_type == NET_IP6_SADR)
1634 {
1635 n = alloca(sizeof(net_addr_ip6_sadr));
1636 net_fill_ip6_sadr(n, net6_prefix(&dst), net6_pxlen(&dst),
1637 net6_prefix(&src), net6_pxlen(&src));
1638 }
1639
1640 net *net = net_get(p->p.main_channel->table, n);
95616c82 1641
1187627a 1642 if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type, i->rtm_family))
2feaa693
OZ
1643 nl_announce_route(s);
1644
d14f8c3c 1645 rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE);
2feaa693
OZ
1646 ra->src = p->p.main_source;
1647 ra->source = RTS_INHERIT;
1648 ra->scope = SCOPE_UNIVERSE;
95616c82
OZ
1649
1650 switch (i->rtm_type)
1651 {
1652 case RTN_UNICAST:
62e64905 1653 ra->dest = RTD_UNICAST;
95616c82 1654
98bb80a2 1655 if (a[RTA_MULTIPATH])
4ff15a75 1656 {
3e792350 1657 struct nexthop *nh = nl_parse_multipath(s, p, a[RTA_MULTIPATH], i->rtm_family);
4e276a89 1658 if (!nh)
95616c82 1659 {
fe9f1a6d 1660 log(L_ERR "KRT: Received strange multipath route %N", net->n.addr);
95616c82
OZ
1661 return;
1662 }
9fdf9d29 1663
2eaf65ec 1664 nexthop_link(ra, nh);
95616c82
OZ
1665 break;
1666 }
1667
4e276a89
JMM
1668 ra->nh.iface = if_find_by_index(oif);
1669 if (!ra->nh.iface)
95616c82 1670 {
fe9f1a6d 1671 log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif);
95616c82
OZ
1672 return;
1673 }
1674
53401bef
OZ
1675 if (a[RTA_GATEWAY])
1676 ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]);
1677
6b0f5f68 1678#ifdef HAVE_MPLS_KERNEL
53401bef
OZ
1679 if (a[RTA_VIA])
1680 ra->nh.gw = rta_get_via(a[RTA_VIA]);
6b0f5f68 1681#endif
95616c82 1682
53401bef
OZ
1683 if (ipa_nonzero(ra->nh.gw))
1684 {
95616c82 1685 /* Silently skip strange 6to4 routes */
0bf95f99 1686 const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96);
4e276a89 1687 if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit))
95616c82
OZ
1688 return;
1689
a1f5e514
OZ
1690 if (i->rtm_flags & RTNH_F_ONLINK)
1691 ra->nh.flags |= RNF_ONLINK;
1692
23c212e7 1693 neighbor *nbr;
586c1800
OZ
1694 nbr = neigh_find(&p->p, ra->nh.gw, ra->nh.iface,
1695 (ra->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0);
23c212e7 1696 if (!nbr || (nbr->scope == SCOPE_HOST))
95616c82 1697 {
4e276a89
JMM
1698 log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr,
1699 ra->nh.gw);
95616c82
OZ
1700 return;
1701 }
1702 }
95616c82
OZ
1703
1704 break;
1705 case RTN_BLACKHOLE:
2feaa693 1706 ra->dest = RTD_BLACKHOLE;
95616c82
OZ
1707 break;
1708 case RTN_UNREACHABLE:
2feaa693 1709 ra->dest = RTD_UNREACHABLE;
95616c82
OZ
1710 break;
1711 case RTN_PROHIBIT:
2feaa693 1712 ra->dest = RTD_PROHIBIT;
95616c82
OZ
1713 break;
1714 /* FIXME: What about RTN_THROW? */
1715 default:
1716 SKIP("type %d\n", i->rtm_type);
1717 return;
1718 }
1719
6b0f5f68 1720#ifdef HAVE_MPLS_KERNEL
d14f8c3c 1721 if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next)
2eaf65ec 1722 ra->nh.labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label);
d14f8c3c
JMM
1723
1724 if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next)
1725 {
1726 switch (rta_get_u16(a[RTA_ENCAP_TYPE]))
1727 {
1728 case LWTUNNEL_ENCAP_MPLS:
1729 {
1730 struct rtattr *enca[BIRD_RTA_MAX];
1731 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
1732 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
2eaf65ec 1733 ra->nh.labels = rta_get_mpls(enca[RTA_DST], ra->nh.label);
d14f8c3c
JMM
1734 break;
1735 }
1736 default:
1737 SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE]));
1738 break;
1739 }
1740 }
6b0f5f68 1741#endif
d14f8c3c 1742
6e75d0d2
OZ
1743 if (i->rtm_scope != def_scope)
1744 {
1745 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1746 ea->next = ra->eattrs;
1747 ra->eattrs = ea;
1748 ea->flags = EALF_SORTED;
1749 ea->count = 1;
1750 ea->attrs[0].id = EA_KRT_SCOPE;
1751 ea->attrs[0].flags = 0;
1752 ea->attrs[0].type = EAF_TYPE_INT;
1753 ea->attrs[0].u.data = i->rtm_scope;
1754 }
95616c82
OZ
1755
1756 if (a[RTA_PREFSRC])
1757 {
9b136840 1758 ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]);
95616c82 1759
2feaa693
OZ
1760 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1761 ea->next = ra->eattrs;
1762 ra->eattrs = ea;
95616c82
OZ
1763 ea->flags = EALF_SORTED;
1764 ea->count = 1;
1765 ea->attrs[0].id = EA_KRT_PREFSRC;
1766 ea->attrs[0].flags = 0;
1767 ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
4c553c5a
MM
1768
1769 struct adata *ad = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
1770 ad->length = sizeof(ps);
1771 memcpy(ad->data, &ps, sizeof(ps));
1772
1773 ea->attrs[0].u.ptr = ad;
95616c82
OZ
1774 }
1775
1776 if (a[RTA_FLOW])
1777 {
2feaa693
OZ
1778 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1779 ea->next = ra->eattrs;
1780 ra->eattrs = ea;
95616c82
OZ
1781 ea->flags = EALF_SORTED;
1782 ea->count = 1;
1783 ea->attrs[0].id = EA_KRT_REALM;
1784 ea->attrs[0].flags = 0;
1785 ea->attrs[0].type = EAF_TYPE_INT;
acb04cfd 1786 ea->attrs[0].u.data = rta_get_u32(a[RTA_FLOW]);
95616c82
OZ
1787 }
1788
9fdf9d29
OZ
1789 if (a[RTA_METRICS])
1790 {
1791 u32 metrics[KRT_METRICS_MAX];
2feaa693 1792 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
9fdf9d29
OZ
1793 int t, n = 0;
1794
1795 if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
1796 {
fe9f1a6d 1797 log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net->n.addr);
9fdf9d29
OZ
1798 return;
1799 }
1800
1801 for (t = 1; t < KRT_METRICS_MAX; t++)
1802 if (metrics[0] & (1 << t))
1803 {
ee7e2ffd 1804 ea->attrs[n].id = EA_CODE(PROTOCOL_KERNEL, KRT_METRICS_OFFSET + t);
9fdf9d29
OZ
1805 ea->attrs[n].flags = 0;
1806 ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */
1807 ea->attrs[n].u.data = metrics[t];
1808 n++;
1809 }
1810
1811 if (n > 0)
1812 {
2feaa693 1813 ea->next = ra->eattrs;
9fdf9d29
OZ
1814 ea->flags = EALF_SORTED;
1815 ea->count = n;
2feaa693 1816 ra->eattrs = ea;
9fdf9d29
OZ
1817 }
1818 }
1819
2feaa693
OZ
1820 /*
1821 * Ideally, now we would send the received route to the rest of kernel code.
98bb80a2
OZ
1822 * But IPv6 ECMP routes before 4.11 are sent as a sequence of routes, so we
1823 * postpone it and merge next hops until the end of the sequence. Note that
3e792350
OZ
1824 * when doing merging of next hops, we expect the new route to be unipath.
1825 * Otherwise, we ignore additional next hops in nexthop_insert().
2feaa693
OZ
1826 */
1827
1828 if (!s->net)
1829 {
1830 /* Store the new route */
1831 s->net = net;
1832 s->attrs = ra;
1833 s->proto = p;
1834 s->new = new;
be17805c 1835 s->krt_src = krt_src;
2feaa693
OZ
1836 s->krt_type = i->rtm_type;
1837 s->krt_proto = i->rtm_protocol;
1838 s->krt_metric = priority;
1839 }
95616c82 1840 else
2feaa693
OZ
1841 {
1842 /* Merge next hops with the stored route */
62e64905 1843 rta *oa = s->attrs;
2feaa693 1844
62e64905
OZ
1845 struct nexthop *nhs = &oa->nh;
1846 nexthop_insert(&nhs, &ra->nh);
1847
1848 /* Perhaps new nexthop is inserted at the first position */
1849 if (nhs == &ra->nh)
1850 {
1851 /* Swap rtas */
1852 s->attrs = ra;
1853
1854 /* Keep old eattrs */
1855 ra->eattrs = oa->eattrs;
1856 }
2feaa693 1857 }
95616c82
OZ
1858}
1859
1860void
1861krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
1862{
1863 struct nlmsghdr *h;
2feaa693 1864 struct nl_parse_state s;
95616c82 1865
1187627a
OZ
1866 nl_parse_begin(&s, 1);
1867 nl_request_dump(AF_UNSPEC, RTM_GETROUTE);
95616c82
OZ
1868 while (h = nl_get_scan())
1869 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
2feaa693 1870 nl_parse_route(&s, h);
95616c82
OZ
1871 else
1872 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
cc5b93f7 1873 nl_parse_end(&s);
95616c82
OZ
1874}
1875
1876/*
1877 * Asynchronous Netlink interface
1878 */
1879
1880static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
1881static byte *nl_async_rx_buffer; /* Receive buffer */
1882
1883static void
1884nl_async_msg(struct nlmsghdr *h)
1885{
2feaa693
OZ
1886 struct nl_parse_state s;
1887
95616c82
OZ
1888 switch (h->nlmsg_type)
1889 {
1890 case RTM_NEWROUTE:
1891 case RTM_DELROUTE:
1892 DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
1187627a 1893 nl_parse_begin(&s, 0);
2feaa693
OZ
1894 nl_parse_route(&s, h);
1895 nl_parse_end(&s);
95616c82
OZ
1896 break;
1897 case RTM_NEWLINK:
1898 case RTM_DELLINK:
1899 DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
1e4891e4
OZ
1900 if (kif_proto)
1901 nl_parse_link(h, 0);
95616c82
OZ
1902 break;
1903 case RTM_NEWADDR:
1904 case RTM_DELADDR:
1905 DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
1e4891e4
OZ
1906 if (kif_proto)
1907 nl_parse_addr(h, 0);
95616c82
OZ
1908 break;
1909 default:
1910 DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
1911 }
1912}
1913
1914static int
3e236955 1915nl_async_hook(sock *sk, uint size UNUSED)
95616c82
OZ
1916{
1917 struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
1918 struct sockaddr_nl sa;
31e9e101
ST
1919 struct msghdr m = {
1920 .msg_name = &sa,
1921 .msg_namelen = sizeof(sa),
1922 .msg_iov = &iov,
1923 .msg_iovlen = 1,
1924 };
95616c82
OZ
1925 struct nlmsghdr *h;
1926 int x;
ae80a2de 1927 uint len;
95616c82
OZ
1928
1929 x = recvmsg(sk->fd, &m, 0);
1930 if (x < 0)
1931 {
1932 if (errno == ENOBUFS)
1933 {
1934 /*
1935 * Netlink reports some packets have been thrown away.
1936 * One day we might react to it by asking for route table
1937 * scan in near future.
1938 */
2c33da50 1939 log(L_WARN "Kernel dropped some netlink messages, will resync on next scan.");
95616c82
OZ
1940 return 1; /* More data are likely to be ready */
1941 }
1942 else if (errno != EWOULDBLOCK)
1943 log(L_ERR "Netlink recvmsg: %m");
1944 return 0;
1945 }
1946 if (sa.nl_pid) /* It isn't from the kernel */
1947 {
1948 DBG("Non-kernel packet\n");
1949 return 1;
1950 }
1951 h = (void *) nl_async_rx_buffer;
1952 len = x;
1953 if (m.msg_flags & MSG_TRUNC)
1954 {
1955 log(L_WARN "Netlink got truncated asynchronous message");
1956 return 1;
1957 }
1958 while (NLMSG_OK(h, len))
1959 {
1960 nl_async_msg(h);
1961 h = NLMSG_NEXT(h, len);
1962 }
1963 if (len)
1964 log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
1965 return 1;
1966}
1967
ccd2a3ed
JMM
1968static void
1969nl_async_err_hook(sock *sk, int e UNUSED)
1970{
1971 nl_async_hook(sk, 0);
1972}
1973
95616c82
OZ
1974static void
1975nl_open_async(void)
1976{
1977 sock *sk;
1978 struct sockaddr_nl sa;
1979 int fd;
95616c82 1980
f83ce94d 1981 if (nl_async_sk)
95616c82 1982 return;
95616c82
OZ
1983
1984 DBG("KRT: Opening async netlink socket\n");
1985
1986 fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1987 if (fd < 0)
1988 {
1989 log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
1990 return;
1991 }
1992
1993 bzero(&sa, sizeof(sa));
1994 sa.nl_family = AF_NETLINK;
29a64162
OZ
1995 sa.nl_groups = RTMGRP_LINK |
1996 RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE |
1997 RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
1998
95616c82
OZ
1999 if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
2000 {
2001 log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
f83ce94d 2002 close(fd);
95616c82
OZ
2003 return;
2004 }
2005
f83ce94d
OZ
2006 nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
2007
95616c82
OZ
2008 sk = nl_async_sk = sk_new(krt_pool);
2009 sk->type = SK_MAGIC;
2010 sk->rx_hook = nl_async_hook;
ccd2a3ed 2011 sk->err_hook = nl_async_err_hook;
95616c82 2012 sk->fd = fd;
05476c4d 2013 if (sk_open(sk) < 0)
95616c82 2014 bug("Netlink: sk_open failed");
95616c82
OZ
2015}
2016
9ddbfbdd 2017
95616c82
OZ
2018/*
2019 * Interface to the UNIX krt module
2020 */
2021
95616c82 2022void
9ddbfbdd
JMM
2023krt_sys_io_init(void)
2024{
05d47bd5 2025 nl_linpool = lp_new_default(krt_pool);
9ddbfbdd
JMM
2026 HASH_INIT(nl_table_map, krt_pool, 6);
2027}
2028
2029int
c6964c30 2030krt_sys_start(struct krt_proto *p)
95616c82 2031{
29a64162 2032 struct krt_proto *old = HASH_FIND(nl_table_map, RTH, p->af, krt_table_id(p));
9ddbfbdd
JMM
2033
2034 if (old)
2035 {
2036 log(L_ERR "%s: Kernel table %u already registered by %s",
2037 p->p.name, krt_table_id(p), old->p.name);
2038 return 0;
2039 }
2040
2041 HASH_INSERT2(nl_table_map, RTH, krt_pool, p);
c6964c30
OZ
2042
2043 nl_open();
2044 nl_open_async();
9ddbfbdd
JMM
2045
2046 return 1;
95616c82
OZ
2047}
2048
2049void
9ddbfbdd 2050krt_sys_shutdown(struct krt_proto *p)
95616c82 2051{
9ddbfbdd 2052 HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
95616c82
OZ
2053}
2054
2055int
2056krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
2057{
4adcb9df 2058 return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
95616c82
OZ
2059}
2060
95616c82
OZ
2061void
2062krt_sys_init_config(struct krt_config *cf)
2063{
2064 cf->sys.table_id = RT_TABLE_MAIN;
bff21441 2065 cf->sys.metric = 32;
95616c82
OZ
2066}
2067
2068void
2069krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
2070{
2071 d->sys.table_id = s->sys.table_id;
4adcb9df 2072 d->sys.metric = s->sys.metric;
95616c82
OZ
2073}
2074
9fdf9d29
OZ
2075static const char *krt_metrics_names[KRT_METRICS_MAX] = {
2076 NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
2077 "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
2078};
2079
2080static const char *krt_features_names[KRT_FEATURES_MAX] = {
2081 "ecn", NULL, NULL, "allfrag"
2082};
2083
2084int
258be565 2085krt_sys_get_attr(const eattr *a, byte *buf, int buflen UNUSED)
9fdf9d29
OZ
2086{
2087 switch (a->id)
2088 {
2089 case EA_KRT_PREFSRC:
2090 bsprintf(buf, "prefsrc");
2091 return GA_NAME;
2092
2093 case EA_KRT_REALM:
2094 bsprintf(buf, "realm");
2095 return GA_NAME;
2096
6e75d0d2
OZ
2097 case EA_KRT_SCOPE:
2098 bsprintf(buf, "scope");
2099 return GA_NAME;
2100
9fdf9d29
OZ
2101 case EA_KRT_LOCK:
2102 buf += bsprintf(buf, "lock:");
2103 ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
2104 return GA_FULL;
2105
2106 case EA_KRT_FEATURES:
2107 buf += bsprintf(buf, "features:");
2108 ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
2109 return GA_FULL;
2110
2111 default:;
2112 int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET;
2113 if (id > 0 && id < KRT_METRICS_MAX)
2114 {
2115 bsprintf(buf, "%s", krt_metrics_names[id]);
2116 return GA_NAME;
2117 }
2118
2119 return GA_UNKNOWN;
2120 }
2121}
2122
95616c82
OZ
2123
2124
2125void
2126kif_sys_start(struct kif_proto *p UNUSED)
2127{
2128 nl_open();
2129 nl_open_async();
2130}
2131
2132void
2133kif_sys_shutdown(struct kif_proto *p UNUSED)
2134{
2135}
153f02da
OZ
2136
2137int
2138kif_update_sysdep_addr(struct iface *i UNUSED)
2139{
2140 return 0;
2141}