]> git.ipfire.org Git - thirdparty/bird.git/blame - sysdep/linux/netlink.c
KRT: Fix debug messages in netlink code
[thirdparty/bird.git] / sysdep / linux / netlink.c
CommitLineData
95616c82
OZ
1/*
2 * BIRD -- Linux Netlink Interface
3 *
4 * (c) 1999--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
a8caff32 9#include <alloca.h>
95616c82 10#include <stdio.h>
f83ce94d 11#include <unistd.h>
95616c82
OZ
12#include <fcntl.h>
13#include <sys/socket.h>
14#include <sys/uio.h>
15#include <errno.h>
16
17#undef LOCAL_DEBUG
18
19#include "nest/bird.h"
20#include "nest/route.h"
21#include "nest/protocol.h"
22#include "nest/iface.h"
4e276a89 23#include "lib/alloca.h"
7152e5ef
JMM
24#include "sysdep/unix/unix.h"
25#include "sysdep/unix/krt.h"
95616c82
OZ
26#include "lib/socket.h"
27#include "lib/string.h"
9ddbfbdd 28#include "lib/hash.h"
95616c82
OZ
29#include "conf/conf.h"
30
31#include <asm/types.h>
32#include <linux/if.h>
33#include <linux/netlink.h>
34#include <linux/rtnetlink.h>
35
6b0f5f68
MJM
36#ifdef HAVE_MPLS_KERNEL
37#include <linux/lwtunnel.h>
38#endif
9ddbfbdd 39
95616c82
OZ
40#ifndef MSG_TRUNC /* Hack: Several versions of glibc miss this one :( */
41#define MSG_TRUNC 0x20
42#endif
43
a08a81c6
OZ
44#ifndef IFA_FLAGS
45#define IFA_FLAGS 8
46#endif
47
95616c82
OZ
48#ifndef IFF_LOWER_UP
49#define IFF_LOWER_UP 0x10000
50#endif
51
9ddbfbdd
JMM
52#ifndef RTA_TABLE
53#define RTA_TABLE 15
54#endif
55
d14f8c3c
JMM
56#ifndef RTA_VIA
57#define RTA_VIA 18
58#endif
59
60#ifndef RTA_NEWDST
61#define RTA_NEWDST 19
62#endif
63
64#ifndef RTA_ENCAP_TYPE
65#define RTA_ENCAP_TYPE 21
66#endif
67
68#ifndef RTA_ENCAP
69#define RTA_ENCAP 22
70#endif
9ddbfbdd 71
cc5b93f7 72#define krt_ecmp6(p) ((p)->af == AF_INET6)
2feaa693 73
517d05df
OZ
74const int rt_default_ecmp = 16;
75
2feaa693
OZ
76/*
77 * Structure nl_parse_state keeps state of received route processing. Ideally,
78 * we could just independently parse received Netlink messages and immediately
98bb80a2
OZ
79 * propagate received routes to the rest of BIRD, but older Linux kernel (before
80 * version 4.11) represents and announces IPv6 ECMP routes not as one route with
81 * multiple next hops (like RTA_MULTIPATH in IPv4 ECMP), but as a sequence of
82 * routes with the same prefix. More recent kernels work as with IPv4.
2feaa693
OZ
83 *
84 * Therefore, BIRD keeps currently processed route in nl_parse_state structure
85 * and postpones its propagation until we expect it to be final; i.e., when
86 * non-matching route is received or when the scan ends. When another matching
87 * route is received, it is merged with the already processed route to form an
88 * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
98bb80a2
OZ
89 * postponing is done in both cases (for simplicity). All IPv4 routes or IPv6
90 * routes with RTA_MULTIPATH set are just considered non-matching.
2feaa693
OZ
91 *
92 * This is ignored for asynchronous notifications (every notification is handled
93 * as a separate route). It is not an issue for our routes, as we ignore such
94 * notifications anyways. But importing alien IPv6 ECMP routes does not work
98bb80a2
OZ
95 * properly with older kernels.
96 *
97 * Whatever the kernel version is, IPv6 ECMP routes are sent as multiple routes
98 * for the same prefix.
2feaa693
OZ
99 */
100
101struct nl_parse_state
102{
103 struct linpool *pool;
104 int scan;
105 int merge;
106
107 net *net;
108 rta *attrs;
109 struct krt_proto *proto;
110 s8 new;
111 s8 krt_src;
112 u8 krt_type;
113 u8 krt_proto;
114 u32 krt_metric;
115};
116
95616c82
OZ
117/*
118 * Synchronous Netlink interface
119 */
120
121struct nl_sock
122{
123 int fd;
124 u32 seq;
125 byte *rx_buffer; /* Receive buffer */
126 struct nlmsghdr *last_hdr; /* Recently received packet */
ae80a2de 127 uint last_size;
95616c82
OZ
128};
129
130#define NL_RX_SIZE 8192
131
2feaa693
OZ
132#define NL_OP_DELETE 0
133#define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
134#define NL_OP_REPLACE (NLM_F_CREATE|NLM_F_REPLACE)
135#define NL_OP_APPEND (NLM_F_CREATE|NLM_F_APPEND)
136
137static linpool *nl_linpool;
138
95616c82
OZ
139static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for synchronous scan */
140static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */
141
142static void
143nl_open_sock(struct nl_sock *nl)
144{
145 if (nl->fd < 0)
146 {
147 nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
148 if (nl->fd < 0)
149 die("Unable to open rtnetlink socket: %m");
574b2324 150 nl->seq = (u32) (current_time() TO_S); /* Or perhaps random_u32() ? */
95616c82
OZ
151 nl->rx_buffer = xmalloc(NL_RX_SIZE);
152 nl->last_hdr = NULL;
153 nl->last_size = 0;
154 }
155}
156
157static void
158nl_open(void)
159{
160 nl_open_sock(&nl_scan);
161 nl_open_sock(&nl_req);
162}
163
164static void
165nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
166{
167 struct sockaddr_nl sa;
168
169 memset(&sa, 0, sizeof(sa));
170 sa.nl_family = AF_NETLINK;
171 nh->nlmsg_pid = 0;
172 nh->nlmsg_seq = ++(nl->seq);
173 if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
174 die("rtnetlink sendto: %m");
175 nl->last_hdr = NULL;
176}
177
178static void
86c3eea0 179nl_request_dump(int af, int cmd)
95616c82
OZ
180{
181 struct {
182 struct nlmsghdr nh;
183 struct rtgenmsg g;
641172c6
OZ
184 } req = {
185 .nh.nlmsg_type = cmd,
186 .nh.nlmsg_len = sizeof(req),
187 .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
188 .g.rtgen_family = af
189 };
95616c82
OZ
190 nl_send(&nl_scan, &req.nh);
191}
192
193static struct nlmsghdr *
194nl_get_reply(struct nl_sock *nl)
195{
196 for(;;)
197 {
198 if (!nl->last_hdr)
199 {
200 struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
201 struct sockaddr_nl sa;
31e9e101
ST
202 struct msghdr m = {
203 .msg_name = &sa,
204 .msg_namelen = sizeof(sa),
205 .msg_iov = &iov,
206 .msg_iovlen = 1,
207 };
95616c82
OZ
208 int x = recvmsg(nl->fd, &m, 0);
209 if (x < 0)
210 die("nl_get_reply: %m");
211 if (sa.nl_pid) /* It isn't from the kernel */
212 {
213 DBG("Non-kernel packet\n");
214 continue;
215 }
216 nl->last_size = x;
217 nl->last_hdr = (void *) nl->rx_buffer;
218 if (m.msg_flags & MSG_TRUNC)
219 bug("nl_get_reply: got truncated reply which should be impossible");
220 }
221 if (NLMSG_OK(nl->last_hdr, nl->last_size))
222 {
223 struct nlmsghdr *h = nl->last_hdr;
224 nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
225 if (h->nlmsg_seq != nl->seq)
226 {
227 log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
228 h->nlmsg_seq, nl->seq);
229 continue;
230 }
231 return h;
232 }
233 if (nl->last_size)
234 log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
235 nl->last_hdr = NULL;
236 }
237}
238
1123e707 239static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
95616c82
OZ
240
241static int
2feaa693 242nl_error(struct nlmsghdr *h, int ignore_esrch)
95616c82
OZ
243{
244 struct nlmsgerr *e;
245 int ec;
246
247 if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
248 {
249 log(L_WARN "Netlink: Truncated error message received");
250 return ENOBUFS;
251 }
252 e = (struct nlmsgerr *) NLMSG_DATA(h);
253 ec = -e->error;
2feaa693 254 if (ec && !(ignore_esrch && (ec == ESRCH)))
95616c82
OZ
255 log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
256 return ec;
257}
258
259static struct nlmsghdr *
260nl_get_scan(void)
261{
262 struct nlmsghdr *h = nl_get_reply(&nl_scan);
263
264 if (h->nlmsg_type == NLMSG_DONE)
265 return NULL;
266 if (h->nlmsg_type == NLMSG_ERROR)
267 {
2feaa693 268 nl_error(h, 0);
95616c82
OZ
269 return NULL;
270 }
271 return h;
272}
273
274static int
2feaa693 275nl_exchange(struct nlmsghdr *pkt, int ignore_esrch)
95616c82
OZ
276{
277 struct nlmsghdr *h;
278
279 nl_send(&nl_req, pkt);
280 for(;;)
281 {
282 h = nl_get_reply(&nl_req);
283 if (h->nlmsg_type == NLMSG_ERROR)
284 break;
285 log(L_WARN "nl_exchange: Unexpected reply received");
286 }
2feaa693 287 return nl_error(h, ignore_esrch) ? -1 : 0;
95616c82
OZ
288}
289
290/*
291 * Netlink attributes
292 */
293
294static int nl_attr_len;
295
296static void *
297nl_checkin(struct nlmsghdr *h, int lsize)
298{
299 nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
300 if (nl_attr_len < 0)
301 {
302 log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
303 return NULL;
304 }
305 return NLMSG_DATA(h);
306}
307
ad276157
JMM
308struct nl_want_attrs {
309 u8 defined:1;
310 u8 checksize:1;
311 u8 size;
312};
313
314
315#define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
316
317static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = {
318 [IFLA_IFNAME] = { 1, 0, 0 },
319 [IFLA_MTU] = { 1, 1, sizeof(u32) },
943478b0 320 [IFLA_MASTER] = { 1, 1, sizeof(u32) },
ad276157
JMM
321 [IFLA_WIRELESS] = { 1, 0, 0 },
322};
323
29a64162 324
e37d2e3e 325#define BIRD_IFA_MAX (IFA_FLAGS+1)
ad276157 326
ad276157
JMM
327static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = {
328 [IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) },
329 [IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) },
330 [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) },
62e64905 331 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
ad276157 332};
29a64162 333
ad276157
JMM
334static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
335 [IFA_ADDRESS] = { 1, 1, sizeof(ip6_addr) },
336 [IFA_LOCAL] = { 1, 1, sizeof(ip6_addr) },
e37d2e3e 337 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
ad276157 338};
29a64162 339
ad276157 340
d14f8c3c 341#define BIRD_RTA_MAX (RTA_ENCAP+1)
ad276157 342
4e276a89 343static struct nl_want_attrs nexthop_attr_want4[BIRD_RTA_MAX] = {
ad276157 344 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
d14f8c3c
JMM
345 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
346 [RTA_ENCAP] = { 1, 0, 0 },
347};
348
4ff15a75 349static struct nl_want_attrs nexthop_attr_want6[BIRD_RTA_MAX] = {
98bb80a2 350 [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
4ff15a75
OZ
351 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
352 [RTA_ENCAP] = { 1, 0, 0 },
353};
354
6b0f5f68 355#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
356static struct nl_want_attrs encap_mpls_want[BIRD_RTA_MAX] = {
357 [RTA_DST] = { 1, 0, 0 },
ad276157 358};
6b0f5f68 359#endif
ad276157 360
ad276157
JMM
361static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
362 [RTA_DST] = { 1, 1, sizeof(ip4_addr) },
363 [RTA_OIF] = { 1, 1, sizeof(u32) },
364 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
365 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
366 [RTA_PREFSRC] = { 1, 1, sizeof(ip4_addr) },
367 [RTA_METRICS] = { 1, 0, 0 },
368 [RTA_MULTIPATH] = { 1, 0, 0 },
369 [RTA_FLOW] = { 1, 1, sizeof(u32) },
370 [RTA_TABLE] = { 1, 1, sizeof(u32) },
d14f8c3c
JMM
371 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
372 [RTA_ENCAP] = { 1, 0, 0 },
ad276157 373};
29a64162 374
ad276157
JMM
375static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
376 [RTA_DST] = { 1, 1, sizeof(ip6_addr) },
be17805c 377 [RTA_SRC] = { 1, 1, sizeof(ip6_addr) },
ad276157
JMM
378 [RTA_IIF] = { 1, 1, sizeof(u32) },
379 [RTA_OIF] = { 1, 1, sizeof(u32) },
380 [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
381 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
382 [RTA_PREFSRC] = { 1, 1, sizeof(ip6_addr) },
383 [RTA_METRICS] = { 1, 0, 0 },
98bb80a2 384 [RTA_MULTIPATH] = { 1, 0, 0 },
ad276157
JMM
385 [RTA_FLOW] = { 1, 1, sizeof(u32) },
386 [RTA_TABLE] = { 1, 1, sizeof(u32) },
d14f8c3c
JMM
387 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
388 [RTA_ENCAP] = { 1, 0, 0 },
389};
390
6b0f5f68 391#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
392static struct nl_want_attrs rtm_attr_want_mpls[BIRD_RTA_MAX] = {
393 [RTA_DST] = { 1, 1, sizeof(u32) },
394 [RTA_IIF] = { 1, 1, sizeof(u32) },
395 [RTA_OIF] = { 1, 1, sizeof(u32) },
396 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
397 [RTA_METRICS] = { 1, 0, 0 },
398 [RTA_FLOW] = { 1, 1, sizeof(u32) },
399 [RTA_TABLE] = { 1, 1, sizeof(u32) },
400 [RTA_VIA] = { 1, 0, 0 },
401 [RTA_NEWDST] = { 1, 0, 0 },
ad276157 402};
6b0f5f68 403#endif
ad276157
JMM
404
405
95616c82 406static int
ad276157 407nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, int ksize)
95616c82
OZ
408{
409 int max = ksize / sizeof(struct rtattr *);
410 bzero(k, ksize);
ad276157
JMM
411
412 for ( ; RTA_OK(a, nl_attr_len); a = RTA_NEXT(a, nl_attr_len))
95616c82 413 {
ad276157
JMM
414 if ((a->rta_type >= max) || !want[a->rta_type].defined)
415 continue;
416
417 if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size))
418 {
9b136840 419 log(L_ERR "nl_parse_attrs: Malformed attribute received");
ad276157
JMM
420 return 0;
421 }
422
423 k[a->rta_type] = a;
95616c82 424 }
ad276157 425
95616c82
OZ
426 if (nl_attr_len)
427 {
428 log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
429 return 0;
430 }
ad276157
JMM
431
432 return 1;
95616c82
OZ
433}
434
d14f8c3c
JMM
435static inline u16 rta_get_u16(struct rtattr *a)
436{ return *(u16 *) RTA_DATA(a); }
437
fce764f9 438static inline u32 rta_get_u32(struct rtattr *a)
acb04cfd
OZ
439{ return *(u32 *) RTA_DATA(a); }
440
441static inline ip4_addr rta_get_ip4(struct rtattr *a)
442{ return ip4_ntoh(*(ip4_addr *) RTA_DATA(a)); }
443
444static inline ip6_addr rta_get_ip6(struct rtattr *a)
445{ return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); }
446
9b136840
JMM
447static inline ip_addr rta_get_ipa(struct rtattr *a)
448{
449 if (RTA_PAYLOAD(a) == sizeof(ip4_addr))
450 return ipa_from_ip4(rta_get_ip4(a));
451 else
452 return ipa_from_ip6(rta_get_ip6(a));
453}
acb04cfd 454
6b0f5f68 455#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
456static inline ip_addr rta_get_via(struct rtattr *a)
457{
458 struct rtvia *v = RTA_DATA(a);
459 switch(v->rtvia_family) {
460 case AF_INET: return ipa_from_ip4(ip4_ntoh(*(ip4_addr *) v->rtvia_addr));
461 case AF_INET6: return ipa_from_ip6(ip6_ntoh(*(ip6_addr *) v->rtvia_addr));
462 }
463 return IPA_NONE;
464}
465
466static u32 rta_mpls_stack[MPLS_MAX_LABEL_STACK];
467static inline int rta_get_mpls(struct rtattr *a, u32 *stack)
468{
469 if (RTA_PAYLOAD(a) % 4)
470 log(L_WARN "KRT: Strange length of received MPLS stack: %u", RTA_PAYLOAD(a));
471
472 return mpls_get(RTA_DATA(a), RTA_PAYLOAD(a) & ~0x3, stack);
473}
6b0f5f68 474#endif
d14f8c3c 475
9fdf9d29
OZ
476struct rtattr *
477nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen)
95616c82 478{
9fdf9d29
OZ
479 uint pos = NLMSG_ALIGN(h->nlmsg_len);
480 uint len = RTA_LENGTH(dlen);
95616c82
OZ
481
482 if (pos + len > bufsize)
483 bug("nl_add_attr: packet buffer overflow");
9fdf9d29
OZ
484
485 struct rtattr *a = (struct rtattr *)((char *)h + pos);
95616c82
OZ
486 a->rta_type = code;
487 a->rta_len = len;
488 h->nlmsg_len = pos + len;
9fdf9d29
OZ
489
490 if (dlen > 0)
491 memcpy(RTA_DATA(a), data, dlen);
492
493 return a;
95616c82
OZ
494}
495
d14f8c3c
JMM
496static inline struct rtattr *
497nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
498{
499 return nl_add_attr(h, bufsize, code, NULL, 0);
500}
501
502static inline void
503nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
504{
505 a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a;
506}
507
508static inline void
509nl_add_attr_u16(struct nlmsghdr *h, uint bufsize, int code, u16 data)
510{
511 nl_add_attr(h, bufsize, code, &data, 2);
512}
513
95616c82 514static inline void
29a64162 515nl_add_attr_u32(struct nlmsghdr *h, uint bufsize, int code, u32 data)
95616c82
OZ
516{
517 nl_add_attr(h, bufsize, code, &data, 4);
518}
519
520static inline void
29a64162 521nl_add_attr_ip4(struct nlmsghdr *h, uint bufsize, int code, ip4_addr ip4)
95616c82 522{
29a64162
OZ
523 ip4 = ip4_hton(ip4);
524 nl_add_attr(h, bufsize, code, &ip4, sizeof(ip4));
525}
526
527static inline void
528nl_add_attr_ip6(struct nlmsghdr *h, uint bufsize, int code, ip6_addr ip6)
529{
530 ip6 = ip6_hton(ip6);
531 nl_add_attr(h, bufsize, code, &ip6, sizeof(ip6));
532}
533
534static inline void
535nl_add_attr_ipa(struct nlmsghdr *h, uint bufsize, int code, ip_addr ipa)
536{
537 if (ipa_is_ip4(ipa))
538 nl_add_attr_ip4(h, bufsize, code, ipa_to_ip4(ipa));
9b136840 539 else
29a64162 540 nl_add_attr_ip6(h, bufsize, code, ipa_to_ip6(ipa));
95616c82
OZ
541}
542
6b0f5f68 543#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
544static inline void
545nl_add_attr_mpls(struct nlmsghdr *h, uint bufsize, int code, int len, u32 *stack)
9fdf9d29 546{
d14f8c3c
JMM
547 char buf[len*4];
548 mpls_put(buf, len, stack);
549 nl_add_attr(h, bufsize, code, buf, len*4);
9fdf9d29 550}
95616c82
OZ
551
552static inline void
d14f8c3c 553nl_add_attr_mpls_encap(struct nlmsghdr *h, uint bufsize, int len, u32 *stack)
95616c82 554{
d14f8c3c
JMM
555 nl_add_attr_u16(h, bufsize, RTA_ENCAP_TYPE, LWTUNNEL_ENCAP_MPLS);
556
557 struct rtattr *nest = nl_open_attr(h, bufsize, RTA_ENCAP);
558 nl_add_attr_mpls(h, bufsize, RTA_DST, len, stack);
559 nl_close_attr(h, nest);
560}
561
562static inline void
563nl_add_attr_via(struct nlmsghdr *h, uint bufsize, ip_addr ipa)
564{
66acbc8d 565 struct rtvia *via = alloca(sizeof(struct rtvia) + 16);
d14f8c3c 566
62e64905
OZ
567 if (ipa_is_ip4(ipa))
568 {
d14f8c3c 569 via->rtvia_family = AF_INET;
62e64905 570 put_ip4(via->rtvia_addr, ipa_to_ip4(ipa));
66acbc8d 571 nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 4);
62e64905
OZ
572 }
573 else
574 {
d14f8c3c 575 via->rtvia_family = AF_INET6;
62e64905 576 put_ip6(via->rtvia_addr, ipa_to_ip6(ipa));
66acbc8d 577 nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 16);
d14f8c3c 578 }
95616c82 579}
6b0f5f68 580#endif
95616c82 581
9fdf9d29
OZ
582static inline struct rtnexthop *
583nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
584{
585 uint pos = NLMSG_ALIGN(h->nlmsg_len);
586 uint len = RTNH_LENGTH(0);
587
588 if (pos + len > bufsize)
589 bug("nl_open_nexthop: packet buffer overflow");
590
591 h->nlmsg_len = pos + len;
592
593 return (void *)h + pos;
594}
595
596static inline void
597nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh)
598{
599 nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh;
600}
95616c82 601
d14f8c3c 602static inline void
6b0f5f68 603nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af UNUSED)
d14f8c3c 604{
6b0f5f68 605#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
606 if (nh->labels > 0)
607 if (af == AF_MPLS)
608 nl_add_attr_mpls(h, bufsize, RTA_NEWDST, nh->labels, nh->label);
609 else
610 nl_add_attr_mpls_encap(h, bufsize, nh->labels, nh->label);
611
612 if (ipa_nonzero(nh->gw))
613 if (af == AF_MPLS)
614 nl_add_attr_via(h, bufsize, nh->gw);
615 else
616 nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
6b0f5f68
MJM
617#else
618
619 if (ipa_nonzero(nh->gw))
620 nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
621#endif
d14f8c3c
JMM
622}
623
95616c82 624static void
d14f8c3c 625nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af)
95616c82 626{
9fdf9d29
OZ
627 struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH);
628
95616c82 629 for (; nh; nh = nh->next)
9fdf9d29
OZ
630 {
631 struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize);
95616c82 632
9fdf9d29
OZ
633 rtnh->rtnh_flags = 0;
634 rtnh->rtnh_hops = nh->weight;
635 rtnh->rtnh_ifindex = nh->iface->index;
95616c82 636
d14f8c3c 637 nl_add_nexthop(h, bufsize, nh, af);
95616c82 638
a1f5e514
OZ
639 if (nh->flags & RNF_ONLINK)
640 rtnh->rtnh_flags |= RTNH_F_ONLINK;
641
9fdf9d29
OZ
642 nl_close_nexthop(h, rtnh);
643 }
644
645 nl_close_attr(h, a);
646}
95616c82 647
4e276a89 648static struct nexthop *
3e792350 649nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr *ra, int af)
95616c82 650{
ad276157 651 struct rtattr *a[BIRD_RTA_MAX];
95616c82 652 struct rtnexthop *nh = RTA_DATA(ra);
4e276a89 653 struct nexthop *rv, *first, **last;
3e236955 654 unsigned len = RTA_PAYLOAD(ra);
95616c82
OZ
655
656 first = NULL;
657 last = &first;
95616c82
OZ
658
659 while (len)
660 {
661 /* Use RTNH_OK(nh,len) ?? */
662 if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
663 return NULL;
664
3e792350 665 *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE);
95616c82
OZ
666 last = &(rv->next);
667
668 rv->weight = nh->rtnh_hops;
669 rv->iface = if_find_by_index(nh->rtnh_ifindex);
670 if (!rv->iface)
671 return NULL;
672
673 /* Nonexistent RTNH_PAYLOAD ?? */
674 nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
98bb80a2
OZ
675 switch (af)
676 {
98bb80a2 677 case AF_INET:
4ff15a75 678 if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a)))
98bb80a2
OZ
679 return NULL;
680 break;
4ff15a75 681
98bb80a2 682 case AF_INET6:
4ff15a75 683 if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want6, a, sizeof(a)))
98bb80a2
OZ
684 return NULL;
685 break;
4ff15a75 686
98bb80a2
OZ
687 default:
688 return NULL;
689 }
690
95616c82
OZ
691 if (a[RTA_GATEWAY])
692 {
23c212e7 693 rv->gw = rta_get_ipa(a[RTA_GATEWAY]);
95616c82 694
a1f5e514
OZ
695 if (nh->rtnh_flags & RTNH_F_ONLINK)
696 rv->flags |= RNF_ONLINK;
697
23c212e7 698 neighbor *nbr;
586c1800
OZ
699 nbr = neigh_find(&p->p, rv->gw, rv->iface,
700 (rv->flags & RNF_ONLINK) ? NEF_ONLINK : 0);
23c212e7 701 if (!nbr || (nbr->scope == SCOPE_HOST))
95616c82
OZ
702 return NULL;
703 }
704 else
d14f8c3c 705 rv->gw = IPA_NONE;
62e64905 706
6b0f5f68 707#ifdef HAVE_MPLS_KERNEL
d14f8c3c 708 if (a[RTA_ENCAP_TYPE])
6b0f5f68
MJM
709 {
710 if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS) {
711 log(L_WARN "KRT: Unknown encapsulation method %d in multipath", rta_get_u16(a[RTA_ENCAP_TYPE]));
712 return NULL;
d14f8c3c
JMM
713 }
714
6b0f5f68
MJM
715 struct rtattr *enca[BIRD_RTA_MAX];
716 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
717 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
718 rv->labels = rta_get_mpls(enca[RTA_DST], rv->label);
719 break;
720 }
721#endif
722
95616c82
OZ
723
724 len -= NLMSG_ALIGN(nh->rtnh_len);
725 nh = RTNH_NEXT(nh);
726 }
727
728 return first;
729}
730
9fdf9d29
OZ
731static void
732nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, int max)
733{
734 struct rtattr *a = nl_open_attr(h, bufsize, RTA_METRICS);
735 int t;
736
737 for (t = 1; t < max; t++)
738 if (metrics[0] & (1 << t))
739 nl_add_attr_u32(h, bufsize, t, metrics[t]);
740
741 nl_close_attr(h, a);
742}
743
744static int
745nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int max)
746{
747 struct rtattr *a = RTA_DATA(hdr);
748 int len = RTA_PAYLOAD(hdr);
749
750 metrics[0] = 0;
751 for (; RTA_OK(a, len); a = RTA_NEXT(a, len))
752 {
753 if (a->rta_type == RTA_UNSPEC)
754 continue;
755
756 if (a->rta_type >= max)
757 continue;
758
759 if (RTA_PAYLOAD(a) != 4)
760 return -1;
761
762 metrics[0] |= 1 << a->rta_type;
acb04cfd 763 metrics[a->rta_type] = rta_get_u32(a);
9fdf9d29
OZ
764 }
765
766 if (len > 0)
767 return -1;
768
769 return 0;
770}
771
95616c82
OZ
772
773/*
774 * Scanning of interfaces
775 */
776
777static void
778nl_parse_link(struct nlmsghdr *h, int scan)
779{
780 struct ifinfomsg *i;
ad276157 781 struct rtattr *a[BIRD_IFLA_MAX];
95616c82
OZ
782 int new = h->nlmsg_type == RTM_NEWLINK;
783 struct iface f = {};
784 struct iface *ifi;
785 char *name;
943478b0 786 u32 mtu, master = 0;
ae80a2de 787 uint fl;
95616c82 788
ad276157 789 if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), ifla_attr_want, a, sizeof(a)))
95616c82 790 return;
ad276157 791 if (!a[IFLA_IFNAME] || (RTA_PAYLOAD(a[IFLA_IFNAME]) < 2) || !a[IFLA_MTU])
95616c82 792 {
ad276157
JMM
793 /*
794 * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
795 * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
796 * We simply ignore all such messages with IFLA_WIRELESS without notice.
797 */
798
799 if (a[IFLA_WIRELESS])
800 return;
801
802 log(L_ERR "KIF: Malformed message received");
95616c82
OZ
803 return;
804 }
ad276157 805
95616c82 806 name = RTA_DATA(a[IFLA_IFNAME]);
acb04cfd 807 mtu = rta_get_u32(a[IFLA_MTU]);
95616c82 808
943478b0
OZ
809 if (a[IFLA_MASTER])
810 master = rta_get_u32(a[IFLA_MASTER]);
811
95616c82
OZ
812 ifi = if_find_by_index(i->ifi_index);
813 if (!new)
814 {
815 DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
816 if (!ifi)
817 return;
818
819 if_delete(ifi);
820 }
821 else
822 {
823 DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
824 if (ifi && strncmp(ifi->name, name, sizeof(ifi->name)-1))
825 if_delete(ifi);
826
827 strncpy(f.name, name, sizeof(f.name)-1);
828 f.index = i->ifi_index;
829 f.mtu = mtu;
830
943478b0
OZ
831 f.master_index = master;
832 f.master = if_find_by_index(master);
833
95616c82
OZ
834 fl = i->ifi_flags;
835 if (fl & IFF_UP)
836 f.flags |= IF_ADMIN_UP;
837 if (fl & IFF_LOWER_UP)
838 f.flags |= IF_LINK_UP;
839 if (fl & IFF_LOOPBACK) /* Loopback */
840 f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
841 else if (fl & IFF_POINTOPOINT) /* PtP */
842 f.flags |= IF_MULTICAST;
843 else if (fl & IFF_BROADCAST) /* Broadcast */
844 f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
845 else
846 f.flags |= IF_MULTIACCESS; /* NBMA */
3216eb03 847
16a3254c
OZ
848 if (fl & IFF_MULTICAST)
849 f.flags |= IF_MULTICAST;
850
3216eb03
OZ
851 ifi = if_update(&f);
852
853 if (!scan)
854 if_end_partial_update(ifi);
95616c82
OZ
855 }
856}
857
858static void
9b136840 859nl_parse_addr4(struct ifaddrmsg *i, int scan, int new)
95616c82 860{
ad276157 861 struct rtattr *a[BIRD_IFA_MAX];
95616c82 862 struct iface *ifi;
e37d2e3e 863 u32 ifa_flags;
95616c82
OZ
864 int scope;
865
9b136840 866 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a)))
95616c82 867 return;
ad276157 868
9b136840 869 if (!a[IFA_LOCAL])
ad276157 870 {
9b136840
JMM
871 log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)");
872 return;
ad276157 873 }
ad276157 874 if (!a[IFA_ADDRESS])
95616c82 875 {
ad276157 876 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
95616c82
OZ
877 return;
878 }
879
880 ifi = if_find_by_index(i->ifa_index);
881 if (!ifi)
882 {
883 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
884 return;
885 }
886
e37d2e3e
OZ
887 if (a[IFA_FLAGS])
888 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
889 else
890 ifa_flags = i->ifa_flags;
891
9b136840 892 struct ifa ifa;
95616c82
OZ
893 bzero(&ifa, sizeof(ifa));
894 ifa.iface = ifi;
cc5b93f7 895 if (ifa_flags & IFA_F_SECONDARY)
95616c82
OZ
896 ifa.flags |= IA_SECONDARY;
897
9b136840
JMM
898 ifa.ip = rta_get_ipa(a[IFA_LOCAL]);
899
d7661fbe 900 if (i->ifa_prefixlen > IP4_MAX_PREFIX_LENGTH)
95616c82
OZ
901 {
902 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
903 new = 0;
904 }
d7661fbe 905 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH)
95616c82 906 {
9b136840
JMM
907 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
908 net_fill_ip4(&ifa.prefix, rta_get_ip4(a[IFA_ADDRESS]), i->ifa_prefixlen);
95616c82
OZ
909
910 /* It is either a host address or a peer address */
9b136840 911 if (ipa_equal(ifa.ip, ifa.brd))
95616c82
OZ
912 ifa.flags |= IA_HOST;
913 else
914 {
915 ifa.flags |= IA_PEER;
9b136840 916 ifa.opposite = ifa.brd;
95616c82
OZ
917 }
918 }
919 else
920 {
9b136840
JMM
921 net_fill_ip4(&ifa.prefix, ipa_to_ip4(ifa.ip), i->ifa_prefixlen);
922 net_normalize(&ifa.prefix);
923
d7661fbe 924 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 1)
95616c82
OZ
925 ifa.opposite = ipa_opposite_m1(ifa.ip);
926
d7661fbe 927 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 2)
95616c82
OZ
928 ifa.opposite = ipa_opposite_m2(ifa.ip);
929
930 if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST])
931 {
9b136840
JMM
932 ip4_addr xbrd = rta_get_ip4(a[IFA_BROADCAST]);
933 ip4_addr ybrd = ip4_or(ipa_to_ip4(ifa.ip), ip4_not(ip4_mkmask(i->ifa_prefixlen)));
934
935 if (ip4_equal(xbrd, net4_prefix(&ifa.prefix)) || ip4_equal(xbrd, ybrd))
936 ifa.brd = ipa_from_ip4(xbrd);
95616c82 937 else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */
9b136840 938 {
e691d16a 939 log(L_ERR "KIF: Invalid broadcast address %I4 for %s", xbrd, ifi->name);
9b136840
JMM
940 ifa.brd = ipa_from_ip4(ybrd);
941 }
942 }
943 }
944
945 scope = ipa_classify(ifa.ip);
946 if (scope < 0)
947 {
948 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
949 return;
950 }
951 ifa.scope = scope & IADDR_SCOPE_MASK;
952
953 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
954 ifi->index, ifi->name,
955 new ? "added" : "removed",
4659b2ae 956 ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite);
9b136840
JMM
957
958 if (new)
959 ifa_update(&ifa);
960 else
961 ifa_delete(&ifa);
962
963 if (!scan)
964 if_end_partial_update(ifi);
965}
966
967static void
968nl_parse_addr6(struct ifaddrmsg *i, int scan, int new)
969{
970 struct rtattr *a[BIRD_IFA_MAX];
971 struct iface *ifi;
cc5b93f7 972 u32 ifa_flags;
9b136840
JMM
973 int scope;
974
975 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a)))
976 return;
977
978 if (!a[IFA_ADDRESS])
979 {
980 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
981 return;
982 }
983
984 ifi = if_find_by_index(i->ifa_index);
985 if (!ifi)
986 {
987 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
988 return;
989 }
990
cc5b93f7
OZ
991 if (a[IFA_FLAGS])
992 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
993 else
994 ifa_flags = i->ifa_flags;
995
9b136840
JMM
996 struct ifa ifa;
997 bzero(&ifa, sizeof(ifa));
998 ifa.iface = ifi;
e37d2e3e 999 if (ifa_flags & IFA_F_SECONDARY)
9b136840
JMM
1000 ifa.flags |= IA_SECONDARY;
1001
e37d2e3e
OZ
1002 /* Ignore tentative addresses silently */
1003 if (ifa_flags & IFA_F_TENTATIVE)
1004 return;
9b136840 1005
95616c82 1006 /* IFA_LOCAL can be unset for IPv6 interfaces */
9b136840
JMM
1007 ifa.ip = rta_get_ipa(a[IFA_LOCAL] ? : a[IFA_ADDRESS]);
1008
d7661fbe 1009 if (i->ifa_prefixlen > IP6_MAX_PREFIX_LENGTH)
9b136840
JMM
1010 {
1011 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
1012 new = 0;
1013 }
d7661fbe 1014 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH)
9b136840
JMM
1015 {
1016 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
1017 net_fill_ip6(&ifa.prefix, rta_get_ip6(a[IFA_ADDRESS]), i->ifa_prefixlen);
1018
1019 /* It is either a host address or a peer address */
1020 if (ipa_equal(ifa.ip, ifa.brd))
1021 ifa.flags |= IA_HOST;
1022 else
1023 {
1024 ifa.flags |= IA_PEER;
1025 ifa.opposite = ifa.brd;
95616c82 1026 }
9b136840
JMM
1027 }
1028 else
1029 {
1030 net_fill_ip6(&ifa.prefix, ipa_to_ip6(ifa.ip), i->ifa_prefixlen);
1031 net_normalize(&ifa.prefix);
1032
d7661fbe 1033 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH - 1)
9b136840 1034 ifa.opposite = ipa_opposite_m1(ifa.ip);
95616c82
OZ
1035 }
1036
1037 scope = ipa_classify(ifa.ip);
1038 if (scope < 0)
1039 {
1040 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
1041 return;
1042 }
1043 ifa.scope = scope & IADDR_SCOPE_MASK;
1044
9b136840 1045 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
95616c82
OZ
1046 ifi->index, ifi->name,
1047 new ? "added" : "removed",
4659b2ae 1048 ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite);
3216eb03 1049
95616c82
OZ
1050 if (new)
1051 ifa_update(&ifa);
1052 else
1053 ifa_delete(&ifa);
3216eb03
OZ
1054
1055 if (!scan)
1056 if_end_partial_update(ifi);
95616c82
OZ
1057}
1058
9b136840
JMM
1059static void
1060nl_parse_addr(struct nlmsghdr *h, int scan)
1061{
1062 struct ifaddrmsg *i;
1063
1064 if (!(i = nl_checkin(h, sizeof(*i))))
1065 return;
1066
1067 int new = (h->nlmsg_type == RTM_NEWADDR);
1068
1069 switch (i->ifa_family)
1070 {
9b136840
JMM
1071 case AF_INET:
1072 return nl_parse_addr4(i, scan, new);
29a64162 1073
9b136840
JMM
1074 case AF_INET6:
1075 return nl_parse_addr6(i, scan, new);
9b136840
JMM
1076 }
1077}
1078
95616c82
OZ
1079void
1080kif_do_scan(struct kif_proto *p UNUSED)
1081{
1082 struct nlmsghdr *h;
1083
1084 if_start_update();
1085
86c3eea0 1086 nl_request_dump(AF_UNSPEC, RTM_GETLINK);
95616c82
OZ
1087 while (h = nl_get_scan())
1088 if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
1089 nl_parse_link(h, 1);
1090 else
1091 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1092
943478b0
OZ
1093 /* Re-resolve master interface for slaves */
1094 struct iface *i;
1095 WALK_LIST(i, iface_list)
1096 if (i->master_index)
1097 {
1098 struct iface f = {
1099 .flags = i->flags,
1100 .mtu = i->mtu,
1101 .index = i->index,
1102 .master_index = i->master_index,
1103 .master = if_find_by_index(i->master_index)
1104 };
1105
1106 if (f.master != i->master)
1107 {
1108 memcpy(f.name, i->name, sizeof(f.name));
1109 if_update(&f);
1110 }
1111 }
1112
d7661fbe 1113 nl_request_dump(AF_INET, RTM_GETADDR);
95616c82
OZ
1114 while (h = nl_get_scan())
1115 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
3216eb03 1116 nl_parse_addr(h, 1);
95616c82
OZ
1117 else
1118 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1119
d7661fbe
JMM
1120 nl_request_dump(AF_INET6, RTM_GETADDR);
1121 while (h = nl_get_scan())
1122 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
1123 nl_parse_addr(h, 1);
1124 else
1125 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1126
95616c82
OZ
1127 if_end_update();
1128}
1129
1130/*
1131 * Routes
1132 */
1133
9ddbfbdd
JMM
1134static inline u32
1135krt_table_id(struct krt_proto *p)
1136{
1137 return KRT_CF->sys.table_id;
1138}
1139
1140static HASH(struct krt_proto) nl_table_map;
1141
29a64162
OZ
1142#define RTH_KEY(p) p->af, krt_table_id(p)
1143#define RTH_NEXT(p) p->sys.hash_next
1144#define RTH_EQ(a1,i1,a2,i2) a1 == a2 && i1 == i2
1145#define RTH_FN(a,i) a ^ u32_hash(i)
9ddbfbdd
JMM
1146
1147#define RTH_REHASH rth_rehash
1148#define RTH_PARAMS /8, *2, 2, 2, 6, 20
1149
1150HASH_DEFINE_REHASH_FN(RTH, struct krt_proto)
95616c82
OZ
1151
1152int
1153krt_capable(rte *e)
1154{
1155 rta *a = e->attrs;
1156
95616c82 1157 switch (a->dest)
62e64905 1158 {
4e276a89 1159 case RTD_UNICAST:
95616c82
OZ
1160 case RTD_BLACKHOLE:
1161 case RTD_UNREACHABLE:
1162 case RTD_PROHIBIT:
62e64905
OZ
1163 return 1;
1164
95616c82
OZ
1165 default:
1166 return 0;
62e64905 1167 }
95616c82
OZ
1168}
1169
1170static inline int
4e276a89 1171nh_bufsize(struct nexthop *nh)
95616c82
OZ
1172{
1173 int rv = 0;
1174 for (; nh != NULL; nh = nh->next)
9fdf9d29 1175 rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
95616c82
OZ
1176 return rv;
1177}
1178
1179static int
13c0be19 1180nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh)
95616c82
OZ
1181{
1182 eattr *ea;
1183 net *net = e->net;
1184 rta *a = e->attrs;
13c0be19 1185 ea_list *eattrs = a->eattrs;
4e276a89 1186 int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh));
4adcb9df 1187 u32 priority = 0;
a8caff32 1188
95616c82
OZ
1189 struct {
1190 struct nlmsghdr h;
1191 struct rtmsg r;
a8caff32
JMM
1192 char buf[0];
1193 } *r;
1194
1195 int rsize = sizeof(*r) + bufsize;
1196 r = alloca(rsize);
95616c82 1197
cc5b93f7 1198 DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op);
95616c82 1199
a8caff32
JMM
1200 bzero(&r->h, sizeof(r->h));
1201 bzero(&r->r, sizeof(r->r));
cc5b93f7 1202 r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE;
a8caff32 1203 r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
cc5b93f7 1204 r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK;
95616c82 1205
a8caff32
JMM
1206 r->r.rtm_family = p->af;
1207 r->r.rtm_dst_len = net_pxlen(net->n.addr);
1208 r->r.rtm_protocol = RTPROT_BIRD;
7074be22 1209 r->r.rtm_scope = RT_SCOPE_NOWHERE;
6b0f5f68 1210#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
1211 if (p->af == AF_MPLS)
1212 {
66acbc8d
OZ
1213 /*
1214 * Kernel MPLS code is a bit picky. We must:
1215 * 1) Always set RT_SCOPE_UNIVERSE and RTN_UNICAST (even for RTM_DELROUTE)
1216 * 2) Never use RTA_PRIORITY
1217 */
1218
d14f8c3c
JMM
1219 u32 label = net_mpls(net->n.addr);
1220 nl_add_attr_mpls(&r->h, rsize, RTA_DST, 1, &label);
66acbc8d
OZ
1221 r->r.rtm_scope = RT_SCOPE_UNIVERSE;
1222 r->r.rtm_type = RTN_UNICAST;
d14f8c3c
JMM
1223 }
1224 else
6b0f5f68 1225#endif
be17805c 1226 {
d14f8c3c 1227 nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr));
95616c82 1228
be17805c
OZ
1229 /* Add source address for IPv6 SADR routes */
1230 if (net->n.addr->type == NET_IP6_SADR)
1231 {
1232 net_addr_ip6_sadr *a = (void *) &net->n.addr;
1233 nl_add_attr_ip6(&r->h, rsize, RTA_SRC, a->src_prefix);
1234 r->r.rtm_src_len = a->src_pxlen;
1235 }
1236 }
1237
2feaa693
OZ
1238 /*
1239 * Strange behavior for RTM_DELROUTE:
1240 * 1) rtm_family is ignored in IPv6, works for IPv4
1241 * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
1242 * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
1243 */
1244
9ddbfbdd 1245 if (krt_table_id(p) < 256)
a8caff32 1246 r->r.rtm_table = krt_table_id(p);
9ddbfbdd 1247 else
a8caff32 1248 nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p));
9ddbfbdd 1249
66acbc8d
OZ
1250 if (p->af == AF_MPLS)
1251 priority = 0;
1252 else if (a->source == RTS_DUMMY)
4adcb9df
OZ
1253 priority = e->u.krt.metric;
1254 else if (KRT_CF->sys.metric)
1255 priority = KRT_CF->sys.metric;
1256 else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC)))
1257 priority = ea->u.data;
78a2cc28 1258
4adcb9df 1259 if (priority)
d1b8fe93 1260 nl_add_attr_u32(&r->h, rsize, RTA_PRIORITY, priority);
78a2cc28 1261
2feaa693
OZ
1262 /* For route delete, we do not specify remaining route attributes */
1263 if (op == NL_OP_DELETE)
1264 goto dest;
78a2cc28 1265
6e75d0d2 1266 /* Default scope is LINK for device routes, UNIVERSE otherwise */
66acbc8d
OZ
1267 if (p->af == AF_MPLS)
1268 r->r.rtm_scope = RT_SCOPE_UNIVERSE;
1269 else if (ea = ea_find(eattrs, EA_KRT_SCOPE))
cc5b93f7 1270 r->r.rtm_scope = ea->u.data;
6e75d0d2 1271 else
4e276a89 1272 r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
95616c82
OZ
1273
1274 if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
a8caff32 1275 nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
95616c82
OZ
1276
1277 if (ea = ea_find(eattrs, EA_KRT_REALM))
a8caff32 1278 nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data);
95616c82 1279
9fdf9d29
OZ
1280
1281 u32 metrics[KRT_METRICS_MAX];
1282 metrics[0] = 0;
1283
1284 struct ea_walk_state ews = { .eattrs = eattrs };
1285 while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX))
1286 {
1287 int id = ea->id - EA_KRT_METRICS;
1288 metrics[0] |= 1 << id;
1289 metrics[id] = ea->u.data;
1290 }
1291
1292 if (metrics[0])
a8caff32 1293 nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX);
9fdf9d29
OZ
1294
1295
2feaa693 1296dest:
2feaa693 1297 switch (dest)
95616c82 1298 {
4e276a89 1299 case RTD_UNICAST:
a8caff32 1300 r->r.rtm_type = RTN_UNICAST;
4e276a89 1301 if (nh->next && !krt_ecmp6(p))
d14f8c3c 1302 nl_add_multipath(&r->h, rsize, nh, p->af);
4e276a89
JMM
1303 else
1304 {
1305 nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index);
d14f8c3c 1306 nl_add_nexthop(&r->h, rsize, nh, p->af);
a1f5e514
OZ
1307
1308 if (nh->flags & RNF_ONLINK)
1309 r->r.rtm_flags |= RTNH_F_ONLINK;
4e276a89 1310 }
95616c82
OZ
1311 break;
1312 case RTD_BLACKHOLE:
a8caff32 1313 r->r.rtm_type = RTN_BLACKHOLE;
95616c82
OZ
1314 break;
1315 case RTD_UNREACHABLE:
a8caff32 1316 r->r.rtm_type = RTN_UNREACHABLE;
95616c82
OZ
1317 break;
1318 case RTD_PROHIBIT:
a8caff32 1319 r->r.rtm_type = RTN_PROHIBIT;
95616c82 1320 break;
2feaa693
OZ
1321 case RTD_NONE:
1322 break;
95616c82
OZ
1323 default:
1324 bug("krt_capable inconsistent with nl_send_route");
1325 }
1326
2feaa693 1327 /* Ignore missing for DELETE */
cc5b93f7 1328 return nl_exchange(&r->h, (op == NL_OP_DELETE));
2feaa693
OZ
1329}
1330
1331static inline int
13c0be19 1332nl_add_rte(struct krt_proto *p, rte *e)
2feaa693
OZ
1333{
1334 rta *a = e->attrs;
1335 int err = 0;
1336
4e276a89 1337 if (krt_ecmp6(p) && a->nh.next)
2feaa693 1338 {
4e276a89 1339 struct nexthop *nh = &(a->nh);
2feaa693 1340
13c0be19 1341 err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, nh);
2feaa693
OZ
1342 if (err < 0)
1343 return err;
1344
1345 for (nh = nh->next; nh; nh = nh->next)
13c0be19 1346 err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, nh);
2feaa693
OZ
1347
1348 return err;
1349 }
1350
13c0be19 1351 return nl_send_route(p, e, NL_OP_ADD, a->dest, &(a->nh));
2feaa693
OZ
1352}
1353
1354static inline int
13c0be19 1355nl_delete_rte(struct krt_proto *p, rte *e)
2feaa693
OZ
1356{
1357 int err = 0;
1358
1359 /* For IPv6, we just repeatedly request DELETE until we get error */
1360 do
13c0be19 1361 err = nl_send_route(p, e, NL_OP_DELETE, RTD_NONE, NULL);
2feaa693
OZ
1362 while (krt_ecmp6(p) && !err);
1363
1364 return err;
95616c82
OZ
1365}
1366
1367void
13c0be19 1368krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old)
95616c82
OZ
1369{
1370 int err = 0;
1371
1372 /*
2feaa693
OZ
1373 * We could use NL_OP_REPLACE, but route replace on Linux has some problems:
1374 *
1375 * 1) Does not check for matching rtm_protocol
1376 * 2) Has broken semantics for IPv6 ECMP
1377 * 3) Crashes some kernel version when used for IPv6 ECMP
1378 *
1379 * So we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the old
1380 * route value, so we do not try to optimize IPv6 ECMP reconfigurations.
95616c82
OZ
1381 */
1382
1383 if (old)
13c0be19 1384 nl_delete_rte(p, old);
95616c82
OZ
1385
1386 if (new)
13c0be19 1387 err = nl_add_rte(p, new);
95616c82
OZ
1388
1389 if (err < 0)
1390 n->n.flags |= KRF_SYNC_ERROR;
1391 else
1392 n->n.flags &= ~KRF_SYNC_ERROR;
1393}
1394
2feaa693
OZ
1395static int
1396nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type)
1397{
1398 /* Route merging must be active */
1399 if (!s->merge)
1400 return 0;
1401
1402 /* Saved and new route must have same network, proto/table, and priority */
1403 if ((s->net != net) || (s->proto != p) || (s->krt_metric != priority))
1404 return 0;
1405
1406 /* Both must be regular unicast routes */
1407 if ((s->krt_type != RTN_UNICAST) || (krt_type != RTN_UNICAST))
1408 return 0;
1409
1410 return 1;
1411}
1412
1413static void
1414nl_announce_route(struct nl_parse_state *s)
1415{
1416 rte *e = rte_get_temp(s->attrs);
1417 e->net = s->net;
1418 e->u.krt.src = s->krt_src;
1419 e->u.krt.proto = s->krt_proto;
1420 e->u.krt.seen = 0;
1421 e->u.krt.best = 0;
1422 e->u.krt.metric = s->krt_metric;
1423
1424 if (s->scan)
1425 krt_got_route(s->proto, e);
1426 else
1427 krt_got_route_async(s->proto, e, s->new);
1428
1429 s->net = NULL;
1430 s->attrs = NULL;
1431 s->proto = NULL;
1432 lp_flush(s->pool);
1433}
1434
1435static inline void
1436nl_parse_begin(struct nl_parse_state *s, int scan, int merge)
1437{
1438 memset(s, 0, sizeof (struct nl_parse_state));
1439 s->pool = nl_linpool;
1440 s->scan = scan;
1441 s->merge = merge;
1442}
1443
1444static inline void
1445nl_parse_end(struct nl_parse_state *s)
1446{
1447 if (s->net)
1448 nl_announce_route(s);
1449}
1450
1451
95616c82
OZ
1452#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
1453
1454static void
2feaa693 1455nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
95616c82
OZ
1456{
1457 struct krt_proto *p;
1458 struct rtmsg *i;
ad276157 1459 struct rtattr *a[BIRD_RTA_MAX];
95616c82
OZ
1460 int new = h->nlmsg_type == RTM_NEWROUTE;
1461
be17805c 1462 net_addr dst, src = {};
95616c82 1463 u32 oif = ~0;
29a64162 1464 u32 table_id;
2feaa693 1465 u32 priority = 0;
6e75d0d2 1466 u32 def_scope = RT_SCOPE_UNIVERSE;
be17805c 1467 int krt_src;
95616c82 1468
ad276157 1469 if (!(i = nl_checkin(h, sizeof(*i))))
95616c82 1470 return;
ad276157
JMM
1471
1472 switch (i->rtm_family)
95616c82 1473 {
29a64162
OZ
1474 case AF_INET:
1475 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a)))
1476 return;
1477
1478 if (a[RTA_DST])
1479 net_fill_ip4(&dst, rta_get_ip4(a[RTA_DST]), i->rtm_dst_len);
1480 else
1481 net_fill_ip4(&dst, IP4_NONE, 0);
1482 break;
1483
cc5b93f7
OZ
1484 case AF_INET6:
1485 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a)))
1486 return;
29a64162
OZ
1487
1488 if (a[RTA_DST])
1489 net_fill_ip6(&dst, rta_get_ip6(a[RTA_DST]), i->rtm_dst_len);
1490 else
1491 net_fill_ip6(&dst, IP6_NONE, 0);
be17805c
OZ
1492
1493 if (a[RTA_SRC])
1494 net_fill_ip6(&src, rta_get_ip6(a[RTA_SRC]), i->rtm_src_len);
1495 else
1496 net_fill_ip6(&src, IP6_NONE, 0);
29a64162
OZ
1497 break;
1498
6b0f5f68 1499#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
1500 case AF_MPLS:
1501 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want_mpls, a, sizeof(a)))
1502 return;
1503
ed610044
OZ
1504 if (!a[RTA_DST])
1505 SKIP("MPLS route without RTA_DST");
1506
1507 if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) != 1)
1508 SKIP("MPLS route with multi-label RTA_DST");
1509
1510 net_fill_mpls(&dst, rta_mpls_stack[0]);
d14f8c3c 1511 break;
6b0f5f68 1512#endif
d14f8c3c 1513
29a64162
OZ
1514 default:
1515 return;
95616c82
OZ
1516 }
1517
95616c82 1518 if (a[RTA_OIF])
acb04cfd 1519 oif = rta_get_u32(a[RTA_OIF]);
95616c82 1520
9ddbfbdd 1521 if (a[RTA_TABLE])
29a64162 1522 table_id = rta_get_u32(a[RTA_TABLE]);
9ddbfbdd 1523 else
29a64162 1524 table_id = i->rtm_table;
9ddbfbdd 1525
29a64162
OZ
1526 /* Do we know this table? */
1527 p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id);
95616c82 1528 if (!p)
4659b2ae 1529 SKIP("unknown table %u\n", table_id);
95616c82 1530
be17805c
OZ
1531 if (a[RTA_SRC] && (p->p.net_type != NET_IP6_SADR))
1532 SKIP("src prefix for non-SADR channel\n");
1533
95616c82
OZ
1534 if (a[RTA_IIF])
1535 SKIP("IIF set\n");
29a64162 1536
95616c82
OZ
1537 if (i->rtm_tos != 0) /* We don't support TOS */
1538 SKIP("TOS %02x\n", i->rtm_tos);
95616c82 1539
2feaa693 1540 if (s->scan && !new)
95616c82
OZ
1541 SKIP("RTM_DELROUTE in scan\n");
1542
2feaa693
OZ
1543 if (a[RTA_PRIORITY])
1544 priority = rta_get_u32(a[RTA_PRIORITY]);
1545
9b136840 1546 int c = net_classify(&dst);
95616c82
OZ
1547 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
1548 SKIP("strange class/scope\n");
1549
95616c82
OZ
1550 switch (i->rtm_protocol)
1551 {
1552 case RTPROT_UNSPEC:
1553 SKIP("proto unspec\n");
1554
1555 case RTPROT_REDIRECT:
be17805c 1556 krt_src = KRT_SRC_REDIRECT;
95616c82
OZ
1557 break;
1558
1559 case RTPROT_KERNEL:
be17805c 1560 krt_src = KRT_SRC_KERNEL;
95616c82
OZ
1561 return;
1562
1563 case RTPROT_BIRD:
2feaa693 1564 if (!s->scan)
95616c82 1565 SKIP("echo\n");
be17805c 1566 krt_src = KRT_SRC_BIRD;
95616c82
OZ
1567 break;
1568
1569 case RTPROT_BOOT:
1570 default:
be17805c 1571 krt_src = KRT_SRC_ALIEN;
95616c82
OZ
1572 }
1573
be17805c
OZ
1574 net_addr *n = &dst;
1575 if (p->p.net_type == NET_IP6_SADR)
1576 {
1577 n = alloca(sizeof(net_addr_ip6_sadr));
1578 net_fill_ip6_sadr(n, net6_prefix(&dst), net6_pxlen(&dst),
1579 net6_prefix(&src), net6_pxlen(&src));
1580 }
1581
1582 net *net = net_get(p->p.main_channel->table, n);
95616c82 1583
2feaa693
OZ
1584 if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type))
1585 nl_announce_route(s);
1586
d14f8c3c 1587 rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE);
2feaa693
OZ
1588 ra->src = p->p.main_source;
1589 ra->source = RTS_INHERIT;
1590 ra->scope = SCOPE_UNIVERSE;
95616c82
OZ
1591
1592 switch (i->rtm_type)
1593 {
1594 case RTN_UNICAST:
62e64905 1595 ra->dest = RTD_UNICAST;
95616c82 1596
98bb80a2 1597 if (a[RTA_MULTIPATH])
4ff15a75 1598 {
3e792350 1599 struct nexthop *nh = nl_parse_multipath(s, p, a[RTA_MULTIPATH], i->rtm_family);
4e276a89 1600 if (!nh)
95616c82 1601 {
fe9f1a6d 1602 log(L_ERR "KRT: Received strange multipath route %N", net->n.addr);
95616c82
OZ
1603 return;
1604 }
9fdf9d29 1605
62e64905 1606 ra->nh = *nh;
95616c82
OZ
1607 break;
1608 }
1609
4e276a89
JMM
1610 ra->nh.iface = if_find_by_index(oif);
1611 if (!ra->nh.iface)
95616c82 1612 {
fe9f1a6d 1613 log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif);
95616c82
OZ
1614 return;
1615 }
1616
6b0f5f68
MJM
1617 if ((i->rtm_family != AF_MPLS) && a[RTA_GATEWAY]
1618#ifdef HAVE_MPLS_KERNEL
1619 || (i->rtm_family == AF_MPLS) && a[RTA_VIA]
1620#endif
1621 )
95616c82 1622 {
6b0f5f68 1623#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
1624 if (i->rtm_family == AF_MPLS)
1625 ra->nh.gw = rta_get_via(a[RTA_VIA]);
1626 else
6b0f5f68 1627#endif
d14f8c3c 1628 ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]);
95616c82
OZ
1629
1630 /* Silently skip strange 6to4 routes */
0bf95f99 1631 const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96);
4e276a89 1632 if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit))
95616c82
OZ
1633 return;
1634
a1f5e514
OZ
1635 if (i->rtm_flags & RTNH_F_ONLINK)
1636 ra->nh.flags |= RNF_ONLINK;
1637
23c212e7 1638 neighbor *nbr;
586c1800
OZ
1639 nbr = neigh_find(&p->p, ra->nh.gw, ra->nh.iface,
1640 (ra->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0);
23c212e7 1641 if (!nbr || (nbr->scope == SCOPE_HOST))
95616c82 1642 {
4e276a89
JMM
1643 log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr,
1644 ra->nh.gw);
95616c82
OZ
1645 return;
1646 }
1647 }
95616c82
OZ
1648
1649 break;
1650 case RTN_BLACKHOLE:
2feaa693 1651 ra->dest = RTD_BLACKHOLE;
95616c82
OZ
1652 break;
1653 case RTN_UNREACHABLE:
2feaa693 1654 ra->dest = RTD_UNREACHABLE;
95616c82
OZ
1655 break;
1656 case RTN_PROHIBIT:
2feaa693 1657 ra->dest = RTD_PROHIBIT;
95616c82
OZ
1658 break;
1659 /* FIXME: What about RTN_THROW? */
1660 default:
1661 SKIP("type %d\n", i->rtm_type);
1662 return;
1663 }
1664
6b0f5f68 1665#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
1666 int labels = 0;
1667 if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next)
1668 labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label);
1669
1670 if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next)
1671 {
1672 switch (rta_get_u16(a[RTA_ENCAP_TYPE]))
1673 {
1674 case LWTUNNEL_ENCAP_MPLS:
1675 {
1676 struct rtattr *enca[BIRD_RTA_MAX];
1677 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
1678 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
1679 labels = rta_get_mpls(enca[RTA_DST], ra->nh.label);
1680 break;
1681 }
1682 default:
1683 SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE]));
1684 break;
1685 }
1686 }
1687
1688 if (labels < 0)
1689 {
1690 log(L_WARN "KRT: Too long MPLS stack received, ignoring.");
1691 ra->nh.labels = 0;
1692 }
1693 else
1694 ra->nh.labels = labels;
6b0f5f68 1695#endif
d14f8c3c 1696
6e75d0d2
OZ
1697 if (i->rtm_scope != def_scope)
1698 {
1699 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1700 ea->next = ra->eattrs;
1701 ra->eattrs = ea;
1702 ea->flags = EALF_SORTED;
1703 ea->count = 1;
1704 ea->attrs[0].id = EA_KRT_SCOPE;
1705 ea->attrs[0].flags = 0;
1706 ea->attrs[0].type = EAF_TYPE_INT;
1707 ea->attrs[0].u.data = i->rtm_scope;
1708 }
95616c82
OZ
1709
1710 if (a[RTA_PREFSRC])
1711 {
9b136840 1712 ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]);
95616c82 1713
2feaa693
OZ
1714 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1715 ea->next = ra->eattrs;
1716 ra->eattrs = ea;
95616c82
OZ
1717 ea->flags = EALF_SORTED;
1718 ea->count = 1;
1719 ea->attrs[0].id = EA_KRT_PREFSRC;
1720 ea->attrs[0].flags = 0;
1721 ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
2feaa693 1722 ea->attrs[0].u.ptr = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
95616c82
OZ
1723 ea->attrs[0].u.ptr->length = sizeof(ps);
1724 memcpy(ea->attrs[0].u.ptr->data, &ps, sizeof(ps));
1725 }
1726
1727 if (a[RTA_FLOW])
1728 {
2feaa693
OZ
1729 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1730 ea->next = ra->eattrs;
1731 ra->eattrs = ea;
95616c82
OZ
1732 ea->flags = EALF_SORTED;
1733 ea->count = 1;
1734 ea->attrs[0].id = EA_KRT_REALM;
1735 ea->attrs[0].flags = 0;
1736 ea->attrs[0].type = EAF_TYPE_INT;
acb04cfd 1737 ea->attrs[0].u.data = rta_get_u32(a[RTA_FLOW]);
95616c82
OZ
1738 }
1739
9fdf9d29
OZ
1740 if (a[RTA_METRICS])
1741 {
1742 u32 metrics[KRT_METRICS_MAX];
2feaa693 1743 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
9fdf9d29
OZ
1744 int t, n = 0;
1745
1746 if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
1747 {
fe9f1a6d 1748 log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net->n.addr);
9fdf9d29
OZ
1749 return;
1750 }
1751
1752 for (t = 1; t < KRT_METRICS_MAX; t++)
1753 if (metrics[0] & (1 << t))
1754 {
ee7e2ffd 1755 ea->attrs[n].id = EA_CODE(PROTOCOL_KERNEL, KRT_METRICS_OFFSET + t);
9fdf9d29
OZ
1756 ea->attrs[n].flags = 0;
1757 ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */
1758 ea->attrs[n].u.data = metrics[t];
1759 n++;
1760 }
1761
1762 if (n > 0)
1763 {
2feaa693 1764 ea->next = ra->eattrs;
9fdf9d29
OZ
1765 ea->flags = EALF_SORTED;
1766 ea->count = n;
2feaa693 1767 ra->eattrs = ea;
9fdf9d29
OZ
1768 }
1769 }
1770
2feaa693
OZ
1771 /*
1772 * Ideally, now we would send the received route to the rest of kernel code.
98bb80a2
OZ
1773 * But IPv6 ECMP routes before 4.11 are sent as a sequence of routes, so we
1774 * postpone it and merge next hops until the end of the sequence. Note that
3e792350
OZ
1775 * when doing merging of next hops, we expect the new route to be unipath.
1776 * Otherwise, we ignore additional next hops in nexthop_insert().
2feaa693
OZ
1777 */
1778
1779 if (!s->net)
1780 {
1781 /* Store the new route */
1782 s->net = net;
1783 s->attrs = ra;
1784 s->proto = p;
1785 s->new = new;
be17805c 1786 s->krt_src = krt_src;
2feaa693
OZ
1787 s->krt_type = i->rtm_type;
1788 s->krt_proto = i->rtm_protocol;
1789 s->krt_metric = priority;
1790 }
95616c82 1791 else
2feaa693
OZ
1792 {
1793 /* Merge next hops with the stored route */
62e64905 1794 rta *oa = s->attrs;
2feaa693 1795
62e64905
OZ
1796 struct nexthop *nhs = &oa->nh;
1797 nexthop_insert(&nhs, &ra->nh);
1798
1799 /* Perhaps new nexthop is inserted at the first position */
1800 if (nhs == &ra->nh)
1801 {
1802 /* Swap rtas */
1803 s->attrs = ra;
1804
1805 /* Keep old eattrs */
1806 ra->eattrs = oa->eattrs;
1807 }
2feaa693 1808 }
95616c82
OZ
1809}
1810
1811void
1812krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
1813{
1814 struct nlmsghdr *h;
2feaa693 1815 struct nl_parse_state s;
95616c82 1816
cc5b93f7 1817 nl_parse_begin(&s, 1, 0);
d7661fbe 1818 nl_request_dump(AF_INET, RTM_GETROUTE);
95616c82
OZ
1819 while (h = nl_get_scan())
1820 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
2feaa693 1821 nl_parse_route(&s, h);
95616c82
OZ
1822 else
1823 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
cc5b93f7 1824 nl_parse_end(&s);
29a64162 1825
cc5b93f7 1826 nl_parse_begin(&s, 1, 1);
d7661fbe
JMM
1827 nl_request_dump(AF_INET6, RTM_GETROUTE);
1828 while (h = nl_get_scan())
1829 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
cc5b93f7 1830 nl_parse_route(&s, h);
d7661fbe
JMM
1831 else
1832 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
2feaa693 1833 nl_parse_end(&s);
d14f8c3c 1834
6b0f5f68 1835#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
1836 nl_parse_begin(&s, 1, 1);
1837 nl_request_dump(AF_MPLS, RTM_GETROUTE);
1838 while (h = nl_get_scan())
1839 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
1840 nl_parse_route(&s, h);
1841 else
1842 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
1843 nl_parse_end(&s);
6b0f5f68 1844#endif
95616c82
OZ
1845}
1846
1847/*
1848 * Asynchronous Netlink interface
1849 */
1850
1851static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
1852static byte *nl_async_rx_buffer; /* Receive buffer */
1853
1854static void
1855nl_async_msg(struct nlmsghdr *h)
1856{
2feaa693
OZ
1857 struct nl_parse_state s;
1858
95616c82
OZ
1859 switch (h->nlmsg_type)
1860 {
1861 case RTM_NEWROUTE:
1862 case RTM_DELROUTE:
1863 DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
2feaa693
OZ
1864 nl_parse_begin(&s, 0, 0);
1865 nl_parse_route(&s, h);
1866 nl_parse_end(&s);
95616c82
OZ
1867 break;
1868 case RTM_NEWLINK:
1869 case RTM_DELLINK:
1870 DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
1e4891e4
OZ
1871 if (kif_proto)
1872 nl_parse_link(h, 0);
95616c82
OZ
1873 break;
1874 case RTM_NEWADDR:
1875 case RTM_DELADDR:
1876 DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
1e4891e4
OZ
1877 if (kif_proto)
1878 nl_parse_addr(h, 0);
95616c82
OZ
1879 break;
1880 default:
1881 DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
1882 }
1883}
1884
1885static int
3e236955 1886nl_async_hook(sock *sk, uint size UNUSED)
95616c82
OZ
1887{
1888 struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
1889 struct sockaddr_nl sa;
31e9e101
ST
1890 struct msghdr m = {
1891 .msg_name = &sa,
1892 .msg_namelen = sizeof(sa),
1893 .msg_iov = &iov,
1894 .msg_iovlen = 1,
1895 };
95616c82
OZ
1896 struct nlmsghdr *h;
1897 int x;
ae80a2de 1898 uint len;
95616c82
OZ
1899
1900 x = recvmsg(sk->fd, &m, 0);
1901 if (x < 0)
1902 {
1903 if (errno == ENOBUFS)
1904 {
1905 /*
1906 * Netlink reports some packets have been thrown away.
1907 * One day we might react to it by asking for route table
1908 * scan in near future.
1909 */
2c33da50 1910 log(L_WARN "Kernel dropped some netlink messages, will resync on next scan.");
95616c82
OZ
1911 return 1; /* More data are likely to be ready */
1912 }
1913 else if (errno != EWOULDBLOCK)
1914 log(L_ERR "Netlink recvmsg: %m");
1915 return 0;
1916 }
1917 if (sa.nl_pid) /* It isn't from the kernel */
1918 {
1919 DBG("Non-kernel packet\n");
1920 return 1;
1921 }
1922 h = (void *) nl_async_rx_buffer;
1923 len = x;
1924 if (m.msg_flags & MSG_TRUNC)
1925 {
1926 log(L_WARN "Netlink got truncated asynchronous message");
1927 return 1;
1928 }
1929 while (NLMSG_OK(h, len))
1930 {
1931 nl_async_msg(h);
1932 h = NLMSG_NEXT(h, len);
1933 }
1934 if (len)
1935 log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
1936 return 1;
1937}
1938
ccd2a3ed
JMM
1939static void
1940nl_async_err_hook(sock *sk, int e UNUSED)
1941{
1942 nl_async_hook(sk, 0);
1943}
1944
95616c82
OZ
1945static void
1946nl_open_async(void)
1947{
1948 sock *sk;
1949 struct sockaddr_nl sa;
1950 int fd;
95616c82 1951
f83ce94d 1952 if (nl_async_sk)
95616c82 1953 return;
95616c82
OZ
1954
1955 DBG("KRT: Opening async netlink socket\n");
1956
1957 fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1958 if (fd < 0)
1959 {
1960 log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
1961 return;
1962 }
1963
1964 bzero(&sa, sizeof(sa));
1965 sa.nl_family = AF_NETLINK;
29a64162
OZ
1966 sa.nl_groups = RTMGRP_LINK |
1967 RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE |
1968 RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
1969
95616c82
OZ
1970 if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
1971 {
1972 log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
f83ce94d 1973 close(fd);
95616c82
OZ
1974 return;
1975 }
1976
f83ce94d
OZ
1977 nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
1978
95616c82
OZ
1979 sk = nl_async_sk = sk_new(krt_pool);
1980 sk->type = SK_MAGIC;
1981 sk->rx_hook = nl_async_hook;
ccd2a3ed 1982 sk->err_hook = nl_async_err_hook;
95616c82 1983 sk->fd = fd;
05476c4d 1984 if (sk_open(sk) < 0)
95616c82 1985 bug("Netlink: sk_open failed");
95616c82
OZ
1986}
1987
9ddbfbdd 1988
95616c82
OZ
1989/*
1990 * Interface to the UNIX krt module
1991 */
1992
95616c82 1993void
9ddbfbdd
JMM
1994krt_sys_io_init(void)
1995{
05d47bd5 1996 nl_linpool = lp_new_default(krt_pool);
9ddbfbdd
JMM
1997 HASH_INIT(nl_table_map, krt_pool, 6);
1998}
1999
2000int
c6964c30 2001krt_sys_start(struct krt_proto *p)
95616c82 2002{
29a64162 2003 struct krt_proto *old = HASH_FIND(nl_table_map, RTH, p->af, krt_table_id(p));
9ddbfbdd
JMM
2004
2005 if (old)
2006 {
2007 log(L_ERR "%s: Kernel table %u already registered by %s",
2008 p->p.name, krt_table_id(p), old->p.name);
2009 return 0;
2010 }
2011
2012 HASH_INSERT2(nl_table_map, RTH, krt_pool, p);
c6964c30
OZ
2013
2014 nl_open();
2015 nl_open_async();
9ddbfbdd
JMM
2016
2017 return 1;
95616c82
OZ
2018}
2019
2020void
9ddbfbdd 2021krt_sys_shutdown(struct krt_proto *p)
95616c82 2022{
9ddbfbdd 2023 HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
95616c82
OZ
2024}
2025
2026int
2027krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
2028{
4adcb9df 2029 return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
95616c82
OZ
2030}
2031
95616c82
OZ
2032void
2033krt_sys_init_config(struct krt_config *cf)
2034{
2035 cf->sys.table_id = RT_TABLE_MAIN;
bff21441 2036 cf->sys.metric = 32;
95616c82
OZ
2037}
2038
2039void
2040krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
2041{
2042 d->sys.table_id = s->sys.table_id;
4adcb9df 2043 d->sys.metric = s->sys.metric;
95616c82
OZ
2044}
2045
9fdf9d29
OZ
2046static const char *krt_metrics_names[KRT_METRICS_MAX] = {
2047 NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
2048 "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
2049};
2050
2051static const char *krt_features_names[KRT_FEATURES_MAX] = {
2052 "ecn", NULL, NULL, "allfrag"
2053};
2054
2055int
2056krt_sys_get_attr(eattr *a, byte *buf, int buflen UNUSED)
2057{
2058 switch (a->id)
2059 {
2060 case EA_KRT_PREFSRC:
2061 bsprintf(buf, "prefsrc");
2062 return GA_NAME;
2063
2064 case EA_KRT_REALM:
2065 bsprintf(buf, "realm");
2066 return GA_NAME;
2067
6e75d0d2
OZ
2068 case EA_KRT_SCOPE:
2069 bsprintf(buf, "scope");
2070 return GA_NAME;
2071
9fdf9d29
OZ
2072 case EA_KRT_LOCK:
2073 buf += bsprintf(buf, "lock:");
2074 ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
2075 return GA_FULL;
2076
2077 case EA_KRT_FEATURES:
2078 buf += bsprintf(buf, "features:");
2079 ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
2080 return GA_FULL;
2081
2082 default:;
2083 int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET;
2084 if (id > 0 && id < KRT_METRICS_MAX)
2085 {
2086 bsprintf(buf, "%s", krt_metrics_names[id]);
2087 return GA_NAME;
2088 }
2089
2090 return GA_UNKNOWN;
2091 }
2092}
2093
95616c82
OZ
2094
2095
2096void
2097kif_sys_start(struct kif_proto *p UNUSED)
2098{
2099 nl_open();
2100 nl_open_async();
2101}
2102
2103void
2104kif_sys_shutdown(struct kif_proto *p UNUSED)
2105{
2106}
153f02da
OZ
2107
2108int
2109kif_update_sysdep_addr(struct iface *i UNUSED)
2110{
2111 return 0;
2112}