]> git.ipfire.org Git - thirdparty/bird.git/blame - sysdep/linux/netlink.c
VPN4 and VPN6 literals
[thirdparty/bird.git] / sysdep / linux / netlink.c
CommitLineData
95616c82
OZ
1/*
2 * BIRD -- Linux Netlink Interface
3 *
4 * (c) 1999--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
a8caff32 9#include <alloca.h>
95616c82 10#include <stdio.h>
f83ce94d 11#include <unistd.h>
95616c82
OZ
12#include <fcntl.h>
13#include <sys/socket.h>
14#include <sys/uio.h>
15#include <errno.h>
16
17#undef LOCAL_DEBUG
18
19#include "nest/bird.h"
20#include "nest/route.h"
21#include "nest/protocol.h"
22#include "nest/iface.h"
4e276a89 23#include "lib/alloca.h"
7152e5ef
MM
24#include "sysdep/unix/timer.h"
25#include "sysdep/unix/unix.h"
26#include "sysdep/unix/krt.h"
95616c82
OZ
27#include "lib/socket.h"
28#include "lib/string.h"
9ddbfbdd 29#include "lib/hash.h"
95616c82
OZ
30#include "conf/conf.h"
31
32#include <asm/types.h>
33#include <linux/if.h>
d14f8c3c 34#include <linux/lwtunnel.h>
95616c82
OZ
35#include <linux/netlink.h>
36#include <linux/rtnetlink.h>
37
9ddbfbdd 38
95616c82
OZ
39#ifndef MSG_TRUNC /* Hack: Several versions of glibc miss this one :( */
40#define MSG_TRUNC 0x20
41#endif
42
a08a81c6
OZ
43#ifndef IFA_FLAGS
44#define IFA_FLAGS 8
45#endif
46
95616c82
OZ
47#ifndef IFF_LOWER_UP
48#define IFF_LOWER_UP 0x10000
49#endif
50
9ddbfbdd
MM
51#ifndef RTA_TABLE
52#define RTA_TABLE 15
53#endif
54
d14f8c3c
MM
55#ifndef RTA_VIA
56#define RTA_VIA 18
57#endif
58
59#ifndef RTA_NEWDST
60#define RTA_NEWDST 19
61#endif
62
63#ifndef RTA_ENCAP_TYPE
64#define RTA_ENCAP_TYPE 21
65#endif
66
67#ifndef RTA_ENCAP
68#define RTA_ENCAP 22
69#endif
9ddbfbdd 70
cc5b93f7 71#define krt_ecmp6(p) ((p)->af == AF_INET6)
2feaa693
OZ
72
73/*
74 * Structure nl_parse_state keeps state of received route processing. Ideally,
75 * we could just independently parse received Netlink messages and immediately
76 * propagate received routes to the rest of BIRD, but Linux kernel represents
77 * and announces IPv6 ECMP routes not as one route with multiple next hops (like
78 * RTA_MULTIPATH in IPv4 ECMP), but as a set of routes with the same prefix.
79 *
80 * Therefore, BIRD keeps currently processed route in nl_parse_state structure
81 * and postpones its propagation until we expect it to be final; i.e., when
82 * non-matching route is received or when the scan ends. When another matching
83 * route is received, it is merged with the already processed route to form an
84 * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
85 * postponing is done in both cases (for simplicity). All IPv4 routes are just
86 * considered non-matching.
87 *
88 * This is ignored for asynchronous notifications (every notification is handled
89 * as a separate route). It is not an issue for our routes, as we ignore such
90 * notifications anyways. But importing alien IPv6 ECMP routes does not work
91 * properly.
92 */
93
94struct nl_parse_state
95{
96 struct linpool *pool;
97 int scan;
98 int merge;
99
100 net *net;
101 rta *attrs;
102 struct krt_proto *proto;
103 s8 new;
104 s8 krt_src;
105 u8 krt_type;
106 u8 krt_proto;
107 u32 krt_metric;
108};
109
95616c82
OZ
110/*
111 * Synchronous Netlink interface
112 */
113
114struct nl_sock
115{
116 int fd;
117 u32 seq;
118 byte *rx_buffer; /* Receive buffer */
119 struct nlmsghdr *last_hdr; /* Recently received packet */
ae80a2de 120 uint last_size;
95616c82
OZ
121};
122
123#define NL_RX_SIZE 8192
124
2feaa693
OZ
125#define NL_OP_DELETE 0
126#define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
127#define NL_OP_REPLACE (NLM_F_CREATE|NLM_F_REPLACE)
128#define NL_OP_APPEND (NLM_F_CREATE|NLM_F_APPEND)
129
130static linpool *nl_linpool;
131
95616c82
OZ
132static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for synchronous scan */
133static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */
134
135static void
136nl_open_sock(struct nl_sock *nl)
137{
138 if (nl->fd < 0)
139 {
140 nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
141 if (nl->fd < 0)
142 die("Unable to open rtnetlink socket: %m");
143 nl->seq = now;
144 nl->rx_buffer = xmalloc(NL_RX_SIZE);
145 nl->last_hdr = NULL;
146 nl->last_size = 0;
147 }
148}
149
150static void
151nl_open(void)
152{
153 nl_open_sock(&nl_scan);
154 nl_open_sock(&nl_req);
155}
156
157static void
158nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
159{
160 struct sockaddr_nl sa;
161
162 memset(&sa, 0, sizeof(sa));
163 sa.nl_family = AF_NETLINK;
164 nh->nlmsg_pid = 0;
165 nh->nlmsg_seq = ++(nl->seq);
166 if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
167 die("rtnetlink sendto: %m");
168 nl->last_hdr = NULL;
169}
170
171static void
86c3eea0 172nl_request_dump(int af, int cmd)
95616c82
OZ
173{
174 struct {
175 struct nlmsghdr nh;
176 struct rtgenmsg g;
641172c6
OZ
177 } req = {
178 .nh.nlmsg_type = cmd,
179 .nh.nlmsg_len = sizeof(req),
180 .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
181 .g.rtgen_family = af
182 };
95616c82
OZ
183 nl_send(&nl_scan, &req.nh);
184}
185
186static struct nlmsghdr *
187nl_get_reply(struct nl_sock *nl)
188{
189 for(;;)
190 {
191 if (!nl->last_hdr)
192 {
193 struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
194 struct sockaddr_nl sa;
31e9e101
ST
195 struct msghdr m = {
196 .msg_name = &sa,
197 .msg_namelen = sizeof(sa),
198 .msg_iov = &iov,
199 .msg_iovlen = 1,
200 };
95616c82
OZ
201 int x = recvmsg(nl->fd, &m, 0);
202 if (x < 0)
203 die("nl_get_reply: %m");
204 if (sa.nl_pid) /* It isn't from the kernel */
205 {
206 DBG("Non-kernel packet\n");
207 continue;
208 }
209 nl->last_size = x;
210 nl->last_hdr = (void *) nl->rx_buffer;
211 if (m.msg_flags & MSG_TRUNC)
212 bug("nl_get_reply: got truncated reply which should be impossible");
213 }
214 if (NLMSG_OK(nl->last_hdr, nl->last_size))
215 {
216 struct nlmsghdr *h = nl->last_hdr;
217 nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
218 if (h->nlmsg_seq != nl->seq)
219 {
220 log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
221 h->nlmsg_seq, nl->seq);
222 continue;
223 }
224 return h;
225 }
226 if (nl->last_size)
227 log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
228 nl->last_hdr = NULL;
229 }
230}
231
1123e707 232static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
95616c82
OZ
233
234static int
2feaa693 235nl_error(struct nlmsghdr *h, int ignore_esrch)
95616c82
OZ
236{
237 struct nlmsgerr *e;
238 int ec;
239
240 if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
241 {
242 log(L_WARN "Netlink: Truncated error message received");
243 return ENOBUFS;
244 }
245 e = (struct nlmsgerr *) NLMSG_DATA(h);
246 ec = -e->error;
2feaa693 247 if (ec && !(ignore_esrch && (ec == ESRCH)))
95616c82
OZ
248 log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
249 return ec;
250}
251
252static struct nlmsghdr *
253nl_get_scan(void)
254{
255 struct nlmsghdr *h = nl_get_reply(&nl_scan);
256
257 if (h->nlmsg_type == NLMSG_DONE)
258 return NULL;
259 if (h->nlmsg_type == NLMSG_ERROR)
260 {
2feaa693 261 nl_error(h, 0);
95616c82
OZ
262 return NULL;
263 }
264 return h;
265}
266
267static int
2feaa693 268nl_exchange(struct nlmsghdr *pkt, int ignore_esrch)
95616c82
OZ
269{
270 struct nlmsghdr *h;
271
272 nl_send(&nl_req, pkt);
273 for(;;)
274 {
275 h = nl_get_reply(&nl_req);
276 if (h->nlmsg_type == NLMSG_ERROR)
277 break;
278 log(L_WARN "nl_exchange: Unexpected reply received");
279 }
2feaa693 280 return nl_error(h, ignore_esrch) ? -1 : 0;
95616c82
OZ
281}
282
283/*
284 * Netlink attributes
285 */
286
287static int nl_attr_len;
288
289static void *
290nl_checkin(struct nlmsghdr *h, int lsize)
291{
292 nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
293 if (nl_attr_len < 0)
294 {
295 log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
296 return NULL;
297 }
298 return NLMSG_DATA(h);
299}
300
ad276157
MM
301struct nl_want_attrs {
302 u8 defined:1;
303 u8 checksize:1;
304 u8 size;
305};
306
307
308#define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
309
310static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = {
311 [IFLA_IFNAME] = { 1, 0, 0 },
312 [IFLA_MTU] = { 1, 1, sizeof(u32) },
313 [IFLA_WIRELESS] = { 1, 0, 0 },
314};
315
29a64162 316
e37d2e3e 317#define BIRD_IFA_MAX (IFA_FLAGS+1)
ad276157 318
ad276157
MM
319static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = {
320 [IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) },
321 [IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) },
322 [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) },
323};
29a64162 324
ad276157
MM
325static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
326 [IFA_ADDRESS] = { 1, 1, sizeof(ip6_addr) },
327 [IFA_LOCAL] = { 1, 1, sizeof(ip6_addr) },
e37d2e3e 328 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
ad276157 329};
29a64162 330
ad276157 331
d14f8c3c 332#define BIRD_RTA_MAX (RTA_ENCAP+1)
ad276157 333
4e276a89 334static struct nl_want_attrs nexthop_attr_want4[BIRD_RTA_MAX] = {
ad276157 335 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
d14f8c3c
MM
336 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
337 [RTA_ENCAP] = { 1, 0, 0 },
338};
339
340static struct nl_want_attrs encap_mpls_want[BIRD_RTA_MAX] = {
341 [RTA_DST] = { 1, 0, 0 },
ad276157
MM
342};
343
ad276157
MM
344static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
345 [RTA_DST] = { 1, 1, sizeof(ip4_addr) },
346 [RTA_OIF] = { 1, 1, sizeof(u32) },
347 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
348 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
349 [RTA_PREFSRC] = { 1, 1, sizeof(ip4_addr) },
350 [RTA_METRICS] = { 1, 0, 0 },
351 [RTA_MULTIPATH] = { 1, 0, 0 },
352 [RTA_FLOW] = { 1, 1, sizeof(u32) },
353 [RTA_TABLE] = { 1, 1, sizeof(u32) },
d14f8c3c
MM
354 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
355 [RTA_ENCAP] = { 1, 0, 0 },
ad276157 356};
29a64162 357
ad276157
MM
358static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
359 [RTA_DST] = { 1, 1, sizeof(ip6_addr) },
360 [RTA_IIF] = { 1, 1, sizeof(u32) },
361 [RTA_OIF] = { 1, 1, sizeof(u32) },
362 [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
363 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
364 [RTA_PREFSRC] = { 1, 1, sizeof(ip6_addr) },
365 [RTA_METRICS] = { 1, 0, 0 },
366 [RTA_FLOW] = { 1, 1, sizeof(u32) },
367 [RTA_TABLE] = { 1, 1, sizeof(u32) },
d14f8c3c
MM
368 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
369 [RTA_ENCAP] = { 1, 0, 0 },
370};
371
372static struct nl_want_attrs rtm_attr_want_mpls[BIRD_RTA_MAX] = {
373 [RTA_DST] = { 1, 1, sizeof(u32) },
374 [RTA_IIF] = { 1, 1, sizeof(u32) },
375 [RTA_OIF] = { 1, 1, sizeof(u32) },
376 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
377 [RTA_METRICS] = { 1, 0, 0 },
378 [RTA_FLOW] = { 1, 1, sizeof(u32) },
379 [RTA_TABLE] = { 1, 1, sizeof(u32) },
380 [RTA_VIA] = { 1, 0, 0 },
381 [RTA_NEWDST] = { 1, 0, 0 },
ad276157 382};
ad276157
MM
383
384
95616c82 385static int
ad276157 386nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, int ksize)
95616c82
OZ
387{
388 int max = ksize / sizeof(struct rtattr *);
389 bzero(k, ksize);
ad276157
MM
390
391 for ( ; RTA_OK(a, nl_attr_len); a = RTA_NEXT(a, nl_attr_len))
95616c82 392 {
ad276157
MM
393 if ((a->rta_type >= max) || !want[a->rta_type].defined)
394 continue;
395
396 if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size))
397 {
9b136840 398 log(L_ERR "nl_parse_attrs: Malformed attribute received");
ad276157
MM
399 return 0;
400 }
401
402 k[a->rta_type] = a;
95616c82 403 }
ad276157 404
95616c82
OZ
405 if (nl_attr_len)
406 {
407 log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
408 return 0;
409 }
ad276157
MM
410
411 return 1;
95616c82
OZ
412}
413
d14f8c3c
MM
414static inline u16 rta_get_u16(struct rtattr *a)
415{ return *(u16 *) RTA_DATA(a); }
416
fce764f9 417static inline u32 rta_get_u32(struct rtattr *a)
acb04cfd
OZ
418{ return *(u32 *) RTA_DATA(a); }
419
420static inline ip4_addr rta_get_ip4(struct rtattr *a)
421{ return ip4_ntoh(*(ip4_addr *) RTA_DATA(a)); }
422
423static inline ip6_addr rta_get_ip6(struct rtattr *a)
424{ return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); }
425
9b136840
MM
426static inline ip_addr rta_get_ipa(struct rtattr *a)
427{
428 if (RTA_PAYLOAD(a) == sizeof(ip4_addr))
429 return ipa_from_ip4(rta_get_ip4(a));
430 else
431 return ipa_from_ip6(rta_get_ip6(a));
432}
acb04cfd 433
d14f8c3c
MM
434static inline ip_addr rta_get_via(struct rtattr *a)
435{
436 struct rtvia *v = RTA_DATA(a);
437 switch(v->rtvia_family) {
438 case AF_INET: return ipa_from_ip4(ip4_ntoh(*(ip4_addr *) v->rtvia_addr));
439 case AF_INET6: return ipa_from_ip6(ip6_ntoh(*(ip6_addr *) v->rtvia_addr));
440 }
441 return IPA_NONE;
442}
443
444static u32 rta_mpls_stack[MPLS_MAX_LABEL_STACK];
445static inline int rta_get_mpls(struct rtattr *a, u32 *stack)
446{
447 if (RTA_PAYLOAD(a) % 4)
448 log(L_WARN "KRT: Strange length of received MPLS stack: %u", RTA_PAYLOAD(a));
449
450 return mpls_get(RTA_DATA(a), RTA_PAYLOAD(a) & ~0x3, stack);
451}
452
9fdf9d29
OZ
453struct rtattr *
454nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen)
95616c82 455{
9fdf9d29
OZ
456 uint pos = NLMSG_ALIGN(h->nlmsg_len);
457 uint len = RTA_LENGTH(dlen);
95616c82
OZ
458
459 if (pos + len > bufsize)
460 bug("nl_add_attr: packet buffer overflow");
9fdf9d29
OZ
461
462 struct rtattr *a = (struct rtattr *)((char *)h + pos);
95616c82
OZ
463 a->rta_type = code;
464 a->rta_len = len;
465 h->nlmsg_len = pos + len;
9fdf9d29
OZ
466
467 if (dlen > 0)
468 memcpy(RTA_DATA(a), data, dlen);
469
470 return a;
95616c82
OZ
471}
472
d14f8c3c
MM
473static inline struct rtattr *
474nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
475{
476 return nl_add_attr(h, bufsize, code, NULL, 0);
477}
478
479static inline void
480nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
481{
482 a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a;
483}
484
485static inline void
486nl_add_attr_u16(struct nlmsghdr *h, uint bufsize, int code, u16 data)
487{
488 nl_add_attr(h, bufsize, code, &data, 2);
489}
490
95616c82 491static inline void
29a64162 492nl_add_attr_u32(struct nlmsghdr *h, uint bufsize, int code, u32 data)
95616c82
OZ
493{
494 nl_add_attr(h, bufsize, code, &data, 4);
495}
496
497static inline void
29a64162 498nl_add_attr_ip4(struct nlmsghdr *h, uint bufsize, int code, ip4_addr ip4)
95616c82 499{
29a64162
OZ
500 ip4 = ip4_hton(ip4);
501 nl_add_attr(h, bufsize, code, &ip4, sizeof(ip4));
502}
503
504static inline void
505nl_add_attr_ip6(struct nlmsghdr *h, uint bufsize, int code, ip6_addr ip6)
506{
507 ip6 = ip6_hton(ip6);
508 nl_add_attr(h, bufsize, code, &ip6, sizeof(ip6));
509}
510
511static inline void
512nl_add_attr_ipa(struct nlmsghdr *h, uint bufsize, int code, ip_addr ipa)
513{
514 if (ipa_is_ip4(ipa))
515 nl_add_attr_ip4(h, bufsize, code, ipa_to_ip4(ipa));
9b136840 516 else
29a64162 517 nl_add_attr_ip6(h, bufsize, code, ipa_to_ip6(ipa));
95616c82
OZ
518}
519
d14f8c3c
MM
520static inline void
521nl_add_attr_mpls(struct nlmsghdr *h, uint bufsize, int code, int len, u32 *stack)
9fdf9d29 522{
d14f8c3c
MM
523 char buf[len*4];
524 mpls_put(buf, len, stack);
525 nl_add_attr(h, bufsize, code, buf, len*4);
9fdf9d29 526}
95616c82
OZ
527
528static inline void
d14f8c3c 529nl_add_attr_mpls_encap(struct nlmsghdr *h, uint bufsize, int len, u32 *stack)
95616c82 530{
d14f8c3c
MM
531 nl_add_attr_u16(h, bufsize, RTA_ENCAP_TYPE, LWTUNNEL_ENCAP_MPLS);
532
533 struct rtattr *nest = nl_open_attr(h, bufsize, RTA_ENCAP);
534 nl_add_attr_mpls(h, bufsize, RTA_DST, len, stack);
535 nl_close_attr(h, nest);
536}
537
538static inline void
539nl_add_attr_via(struct nlmsghdr *h, uint bufsize, ip_addr ipa)
540{
541 struct rtattr *nest = nl_open_attr(h, bufsize, RTA_VIA);
542 struct rtvia *via = RTA_DATA(nest);
543
544 h->nlmsg_len += sizeof(*via);
545
546 if (ipa_is_ip4(ipa)) {
547 ip4_addr ip4 = ipa_to_ip4(ipa);
548 ip4 = ip4_hton(ip4);
549 via->rtvia_family = AF_INET;
550 memcpy(via->rtvia_addr, &ip4, sizeof(ip4));
551 h->nlmsg_len += sizeof(ip4);
552 } else {
553 ip6_addr ip6 = ipa_to_ip6(ipa);
554 ip6 = ip6_hton(ip6);
555 via->rtvia_family = AF_INET6;
556 memcpy(via->rtvia_addr, &ip6, sizeof(ip6));
557 h->nlmsg_len += sizeof(ip6);
558 }
559
560 nl_close_attr(h, nest);
95616c82
OZ
561}
562
9fdf9d29
OZ
563static inline struct rtnexthop *
564nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
565{
566 uint pos = NLMSG_ALIGN(h->nlmsg_len);
567 uint len = RTNH_LENGTH(0);
568
569 if (pos + len > bufsize)
570 bug("nl_open_nexthop: packet buffer overflow");
571
572 h->nlmsg_len = pos + len;
573
574 return (void *)h + pos;
575}
576
577static inline void
578nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh)
579{
580 nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh;
581}
95616c82 582
d14f8c3c
MM
583static inline void
584nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af)
585{
586 if (nh->labels > 0)
587 if (af == AF_MPLS)
588 nl_add_attr_mpls(h, bufsize, RTA_NEWDST, nh->labels, nh->label);
589 else
590 nl_add_attr_mpls_encap(h, bufsize, nh->labels, nh->label);
591
592 if (ipa_nonzero(nh->gw))
593 if (af == AF_MPLS)
594 nl_add_attr_via(h, bufsize, nh->gw);
595 else
596 nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
597}
598
95616c82 599static void
d14f8c3c 600nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af)
95616c82 601{
9fdf9d29
OZ
602 struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH);
603
95616c82 604 for (; nh; nh = nh->next)
9fdf9d29
OZ
605 {
606 struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize);
95616c82 607
9fdf9d29
OZ
608 rtnh->rtnh_flags = 0;
609 rtnh->rtnh_hops = nh->weight;
610 rtnh->rtnh_ifindex = nh->iface->index;
95616c82 611
d14f8c3c 612 nl_add_nexthop(h, bufsize, nh, af);
95616c82 613
9fdf9d29
OZ
614 nl_close_nexthop(h, rtnh);
615 }
616
617 nl_close_attr(h, a);
618}
95616c82 619
4e276a89 620static struct nexthop *
95616c82
OZ
621nl_parse_multipath(struct krt_proto *p, struct rtattr *ra)
622{
623 /* Temporary buffer for multicast nexthops */
4e276a89 624 static struct nexthop *nh_buffer;
95616c82
OZ
625 static int nh_buf_size; /* in number of structures */
626 static int nh_buf_used;
627
ad276157 628 struct rtattr *a[BIRD_RTA_MAX];
95616c82 629 struct rtnexthop *nh = RTA_DATA(ra);
4e276a89 630 struct nexthop *rv, *first, **last;
3e236955 631 unsigned len = RTA_PAYLOAD(ra);
95616c82
OZ
632
633 first = NULL;
634 last = &first;
635 nh_buf_used = 0;
636
637 while (len)
638 {
639 /* Use RTNH_OK(nh,len) ?? */
640 if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
641 return NULL;
642
643 if (nh_buf_used == nh_buf_size)
644 {
645 nh_buf_size = nh_buf_size ? (nh_buf_size * 2) : 4;
d14f8c3c 646 nh_buffer = xrealloc(nh_buffer, nh_buf_size * NEXTHOP_MAX_SIZE);
95616c82
OZ
647 }
648 *last = rv = nh_buffer + nh_buf_used++;
649 rv->next = NULL;
650 last = &(rv->next);
651
652 rv->weight = nh->rtnh_hops;
653 rv->iface = if_find_by_index(nh->rtnh_ifindex);
654 if (!rv->iface)
655 return NULL;
656
657 /* Nonexistent RTNH_PAYLOAD ?? */
658 nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
4e276a89 659 nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a));
95616c82
OZ
660 if (a[RTA_GATEWAY])
661 {
23c212e7 662 rv->gw = rta_get_ipa(a[RTA_GATEWAY]);
95616c82 663
23c212e7
OZ
664 neighbor *nbr;
665 nbr = neigh_find2(&p->p, &rv->gw, rv->iface,
666 (nh->rtnh_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
667 if (!nbr || (nbr->scope == SCOPE_HOST))
95616c82
OZ
668 return NULL;
669 }
670 else
d14f8c3c
MM
671 rv->gw = IPA_NONE;
672 if (a[RTA_ENCAP_TYPE])
673 {
674 if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS) {
675 log(L_WARN "KRT: Unknown encapsulation method %d in multipath", rta_get_u16(a[RTA_ENCAP_TYPE]));
676 return NULL;
677 }
678
679 struct rtattr *enca[BIRD_RTA_MAX];
680 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
681 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
682 rv->labels = rta_get_mpls(enca[RTA_DST], rv->label);
683 break;
684 }
685
95616c82
OZ
686
687 len -= NLMSG_ALIGN(nh->rtnh_len);
688 nh = RTNH_NEXT(nh);
689 }
690
691 return first;
692}
693
9fdf9d29
OZ
694static void
695nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, int max)
696{
697 struct rtattr *a = nl_open_attr(h, bufsize, RTA_METRICS);
698 int t;
699
700 for (t = 1; t < max; t++)
701 if (metrics[0] & (1 << t))
702 nl_add_attr_u32(h, bufsize, t, metrics[t]);
703
704 nl_close_attr(h, a);
705}
706
707static int
708nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int max)
709{
710 struct rtattr *a = RTA_DATA(hdr);
711 int len = RTA_PAYLOAD(hdr);
712
713 metrics[0] = 0;
714 for (; RTA_OK(a, len); a = RTA_NEXT(a, len))
715 {
716 if (a->rta_type == RTA_UNSPEC)
717 continue;
718
719 if (a->rta_type >= max)
720 continue;
721
722 if (RTA_PAYLOAD(a) != 4)
723 return -1;
724
725 metrics[0] |= 1 << a->rta_type;
acb04cfd 726 metrics[a->rta_type] = rta_get_u32(a);
9fdf9d29
OZ
727 }
728
729 if (len > 0)
730 return -1;
731
732 return 0;
733}
734
95616c82
OZ
735
736/*
737 * Scanning of interfaces
738 */
739
740static void
741nl_parse_link(struct nlmsghdr *h, int scan)
742{
743 struct ifinfomsg *i;
ad276157 744 struct rtattr *a[BIRD_IFLA_MAX];
95616c82
OZ
745 int new = h->nlmsg_type == RTM_NEWLINK;
746 struct iface f = {};
747 struct iface *ifi;
748 char *name;
749 u32 mtu;
ae80a2de 750 uint fl;
95616c82 751
ad276157 752 if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), ifla_attr_want, a, sizeof(a)))
95616c82 753 return;
ad276157 754 if (!a[IFLA_IFNAME] || (RTA_PAYLOAD(a[IFLA_IFNAME]) < 2) || !a[IFLA_MTU])
95616c82 755 {
ad276157
MM
756 /*
757 * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
758 * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
759 * We simply ignore all such messages with IFLA_WIRELESS without notice.
760 */
761
762 if (a[IFLA_WIRELESS])
763 return;
764
765 log(L_ERR "KIF: Malformed message received");
95616c82
OZ
766 return;
767 }
ad276157 768
95616c82 769 name = RTA_DATA(a[IFLA_IFNAME]);
acb04cfd 770 mtu = rta_get_u32(a[IFLA_MTU]);
95616c82
OZ
771
772 ifi = if_find_by_index(i->ifi_index);
773 if (!new)
774 {
775 DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
776 if (!ifi)
777 return;
778
779 if_delete(ifi);
780 }
781 else
782 {
783 DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
784 if (ifi && strncmp(ifi->name, name, sizeof(ifi->name)-1))
785 if_delete(ifi);
786
787 strncpy(f.name, name, sizeof(f.name)-1);
788 f.index = i->ifi_index;
789 f.mtu = mtu;
790
791 fl = i->ifi_flags;
792 if (fl & IFF_UP)
793 f.flags |= IF_ADMIN_UP;
794 if (fl & IFF_LOWER_UP)
795 f.flags |= IF_LINK_UP;
796 if (fl & IFF_LOOPBACK) /* Loopback */
797 f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
798 else if (fl & IFF_POINTOPOINT) /* PtP */
799 f.flags |= IF_MULTICAST;
800 else if (fl & IFF_BROADCAST) /* Broadcast */
801 f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
802 else
803 f.flags |= IF_MULTIACCESS; /* NBMA */
3216eb03 804
16a3254c
OZ
805 if (fl & IFF_MULTICAST)
806 f.flags |= IF_MULTICAST;
807
3216eb03
OZ
808 ifi = if_update(&f);
809
810 if (!scan)
811 if_end_partial_update(ifi);
95616c82
OZ
812 }
813}
814
815static void
9b136840 816nl_parse_addr4(struct ifaddrmsg *i, int scan, int new)
95616c82 817{
ad276157 818 struct rtattr *a[BIRD_IFA_MAX];
95616c82 819 struct iface *ifi;
e37d2e3e 820 u32 ifa_flags;
95616c82
OZ
821 int scope;
822
9b136840 823 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a)))
95616c82 824 return;
ad276157 825
9b136840 826 if (!a[IFA_LOCAL])
ad276157 827 {
9b136840
MM
828 log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)");
829 return;
ad276157 830 }
ad276157 831 if (!a[IFA_ADDRESS])
95616c82 832 {
ad276157 833 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
95616c82
OZ
834 return;
835 }
836
837 ifi = if_find_by_index(i->ifa_index);
838 if (!ifi)
839 {
840 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
841 return;
842 }
843
e37d2e3e
OZ
844 if (a[IFA_FLAGS])
845 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
846 else
847 ifa_flags = i->ifa_flags;
848
9b136840 849 struct ifa ifa;
95616c82
OZ
850 bzero(&ifa, sizeof(ifa));
851 ifa.iface = ifi;
cc5b93f7 852 if (ifa_flags & IFA_F_SECONDARY)
95616c82
OZ
853 ifa.flags |= IA_SECONDARY;
854
9b136840
MM
855 ifa.ip = rta_get_ipa(a[IFA_LOCAL]);
856
d7661fbe 857 if (i->ifa_prefixlen > IP4_MAX_PREFIX_LENGTH)
95616c82
OZ
858 {
859 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
860 new = 0;
861 }
d7661fbe 862 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH)
95616c82 863 {
9b136840
MM
864 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
865 net_fill_ip4(&ifa.prefix, rta_get_ip4(a[IFA_ADDRESS]), i->ifa_prefixlen);
95616c82
OZ
866
867 /* It is either a host address or a peer address */
9b136840 868 if (ipa_equal(ifa.ip, ifa.brd))
95616c82
OZ
869 ifa.flags |= IA_HOST;
870 else
871 {
872 ifa.flags |= IA_PEER;
9b136840 873 ifa.opposite = ifa.brd;
95616c82
OZ
874 }
875 }
876 else
877 {
9b136840
MM
878 net_fill_ip4(&ifa.prefix, ipa_to_ip4(ifa.ip), i->ifa_prefixlen);
879 net_normalize(&ifa.prefix);
880
d7661fbe 881 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 1)
95616c82
OZ
882 ifa.opposite = ipa_opposite_m1(ifa.ip);
883
d7661fbe 884 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 2)
95616c82
OZ
885 ifa.opposite = ipa_opposite_m2(ifa.ip);
886
887 if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST])
888 {
9b136840
MM
889 ip4_addr xbrd = rta_get_ip4(a[IFA_BROADCAST]);
890 ip4_addr ybrd = ip4_or(ipa_to_ip4(ifa.ip), ip4_not(ip4_mkmask(i->ifa_prefixlen)));
891
892 if (ip4_equal(xbrd, net4_prefix(&ifa.prefix)) || ip4_equal(xbrd, ybrd))
893 ifa.brd = ipa_from_ip4(xbrd);
95616c82 894 else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */
9b136840 895 {
e691d16a 896 log(L_ERR "KIF: Invalid broadcast address %I4 for %s", xbrd, ifi->name);
9b136840
MM
897 ifa.brd = ipa_from_ip4(ybrd);
898 }
899 }
900 }
901
902 scope = ipa_classify(ifa.ip);
903 if (scope < 0)
904 {
905 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
906 return;
907 }
908 ifa.scope = scope & IADDR_SCOPE_MASK;
909
910 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
911 ifi->index, ifi->name,
912 new ? "added" : "removed",
913 ifa.ip, ifa.flags, ifa.prefix, ifa.brd, ifa.opposite);
914
915 if (new)
916 ifa_update(&ifa);
917 else
918 ifa_delete(&ifa);
919
920 if (!scan)
921 if_end_partial_update(ifi);
922}
923
924static void
925nl_parse_addr6(struct ifaddrmsg *i, int scan, int new)
926{
927 struct rtattr *a[BIRD_IFA_MAX];
928 struct iface *ifi;
cc5b93f7 929 u32 ifa_flags;
9b136840
MM
930 int scope;
931
932 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a)))
933 return;
934
935 if (!a[IFA_ADDRESS])
936 {
937 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
938 return;
939 }
940
941 ifi = if_find_by_index(i->ifa_index);
942 if (!ifi)
943 {
944 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
945 return;
946 }
947
cc5b93f7
OZ
948 if (a[IFA_FLAGS])
949 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
950 else
951 ifa_flags = i->ifa_flags;
952
9b136840
MM
953 struct ifa ifa;
954 bzero(&ifa, sizeof(ifa));
955 ifa.iface = ifi;
e37d2e3e 956 if (ifa_flags & IFA_F_SECONDARY)
9b136840
MM
957 ifa.flags |= IA_SECONDARY;
958
e37d2e3e
OZ
959 /* Ignore tentative addresses silently */
960 if (ifa_flags & IFA_F_TENTATIVE)
961 return;
9b136840 962
95616c82 963 /* IFA_LOCAL can be unset for IPv6 interfaces */
9b136840
MM
964 ifa.ip = rta_get_ipa(a[IFA_LOCAL] ? : a[IFA_ADDRESS]);
965
d7661fbe 966 if (i->ifa_prefixlen > IP6_MAX_PREFIX_LENGTH)
9b136840
MM
967 {
968 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
969 new = 0;
970 }
d7661fbe 971 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH)
9b136840
MM
972 {
973 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
974 net_fill_ip6(&ifa.prefix, rta_get_ip6(a[IFA_ADDRESS]), i->ifa_prefixlen);
975
976 /* It is either a host address or a peer address */
977 if (ipa_equal(ifa.ip, ifa.brd))
978 ifa.flags |= IA_HOST;
979 else
980 {
981 ifa.flags |= IA_PEER;
982 ifa.opposite = ifa.brd;
95616c82 983 }
9b136840
MM
984 }
985 else
986 {
987 net_fill_ip6(&ifa.prefix, ipa_to_ip6(ifa.ip), i->ifa_prefixlen);
988 net_normalize(&ifa.prefix);
989
d7661fbe 990 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH - 1)
9b136840 991 ifa.opposite = ipa_opposite_m1(ifa.ip);
95616c82
OZ
992 }
993
994 scope = ipa_classify(ifa.ip);
995 if (scope < 0)
996 {
997 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
998 return;
999 }
1000 ifa.scope = scope & IADDR_SCOPE_MASK;
1001
9b136840 1002 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
95616c82
OZ
1003 ifi->index, ifi->name,
1004 new ? "added" : "removed",
9b136840 1005 ifa.ip, ifa.flags, ifa.prefix, ifa.brd, ifa.opposite);
3216eb03 1006
95616c82
OZ
1007 if (new)
1008 ifa_update(&ifa);
1009 else
1010 ifa_delete(&ifa);
3216eb03
OZ
1011
1012 if (!scan)
1013 if_end_partial_update(ifi);
95616c82
OZ
1014}
1015
9b136840
MM
1016static void
1017nl_parse_addr(struct nlmsghdr *h, int scan)
1018{
1019 struct ifaddrmsg *i;
1020
1021 if (!(i = nl_checkin(h, sizeof(*i))))
1022 return;
1023
1024 int new = (h->nlmsg_type == RTM_NEWADDR);
1025
1026 switch (i->ifa_family)
1027 {
9b136840
MM
1028 case AF_INET:
1029 return nl_parse_addr4(i, scan, new);
29a64162 1030
9b136840
MM
1031 case AF_INET6:
1032 return nl_parse_addr6(i, scan, new);
9b136840
MM
1033 }
1034}
1035
95616c82
OZ
1036void
1037kif_do_scan(struct kif_proto *p UNUSED)
1038{
1039 struct nlmsghdr *h;
1040
1041 if_start_update();
1042
86c3eea0 1043 nl_request_dump(AF_UNSPEC, RTM_GETLINK);
95616c82
OZ
1044 while (h = nl_get_scan())
1045 if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
1046 nl_parse_link(h, 1);
1047 else
1048 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1049
d7661fbe 1050 nl_request_dump(AF_INET, RTM_GETADDR);
95616c82
OZ
1051 while (h = nl_get_scan())
1052 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
3216eb03 1053 nl_parse_addr(h, 1);
95616c82
OZ
1054 else
1055 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1056
d7661fbe
MM
1057 nl_request_dump(AF_INET6, RTM_GETADDR);
1058 while (h = nl_get_scan())
1059 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
1060 nl_parse_addr(h, 1);
1061 else
1062 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1063
95616c82
OZ
1064 if_end_update();
1065}
1066
1067/*
1068 * Routes
1069 */
1070
9ddbfbdd
MM
1071static inline u32
1072krt_table_id(struct krt_proto *p)
1073{
1074 return KRT_CF->sys.table_id;
1075}
1076
1077static HASH(struct krt_proto) nl_table_map;
1078
29a64162
OZ
1079#define RTH_KEY(p) p->af, krt_table_id(p)
1080#define RTH_NEXT(p) p->sys.hash_next
1081#define RTH_EQ(a1,i1,a2,i2) a1 == a2 && i1 == i2
1082#define RTH_FN(a,i) a ^ u32_hash(i)
9ddbfbdd
MM
1083
1084#define RTH_REHASH rth_rehash
1085#define RTH_PARAMS /8, *2, 2, 2, 6, 20
1086
1087HASH_DEFINE_REHASH_FN(RTH, struct krt_proto)
95616c82
OZ
1088
1089int
1090krt_capable(rte *e)
1091{
1092 rta *a = e->attrs;
1093
95616c82
OZ
1094 switch (a->dest)
1095 {
4e276a89
MM
1096 case RTD_UNICAST:
1097 for (struct nexthop *nh = &(a->nh); nh; nh = nh->next)
1098 if (nh->iface)
1099 return 1;
1100 return 0;
95616c82
OZ
1101 case RTD_BLACKHOLE:
1102 case RTD_UNREACHABLE:
1103 case RTD_PROHIBIT:
95616c82
OZ
1104 break;
1105 default:
1106 return 0;
1107 }
1108 return 1;
1109}
1110
1111static inline int
4e276a89 1112nh_bufsize(struct nexthop *nh)
95616c82
OZ
1113{
1114 int rv = 0;
1115 for (; nh != NULL; nh = nh->next)
9fdf9d29 1116 rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
95616c82
OZ
1117 return rv;
1118}
1119
1120static int
4e276a89 1121nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int dest, struct nexthop *nh)
95616c82
OZ
1122{
1123 eattr *ea;
1124 net *net = e->net;
1125 rta *a = e->attrs;
4e276a89 1126 int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh));
4adcb9df 1127 u32 priority = 0;
a8caff32 1128
95616c82
OZ
1129 struct {
1130 struct nlmsghdr h;
1131 struct rtmsg r;
a8caff32
MM
1132 char buf[0];
1133 } *r;
1134
1135 int rsize = sizeof(*r) + bufsize;
1136 r = alloca(rsize);
95616c82 1137
cc5b93f7 1138 DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op);
95616c82 1139
a8caff32
MM
1140 bzero(&r->h, sizeof(r->h));
1141 bzero(&r->r, sizeof(r->r));
cc5b93f7 1142 r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE;
a8caff32 1143 r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
cc5b93f7 1144 r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK;
95616c82 1145
a8caff32
MM
1146 r->r.rtm_family = p->af;
1147 r->r.rtm_dst_len = net_pxlen(net->n.addr);
1148 r->r.rtm_protocol = RTPROT_BIRD;
1149 r->r.rtm_scope = RT_SCOPE_UNIVERSE;
d14f8c3c
MM
1150 if (p->af == AF_MPLS)
1151 {
1152 u32 label = net_mpls(net->n.addr);
1153 nl_add_attr_mpls(&r->h, rsize, RTA_DST, 1, &label);
1154 }
1155 else
1156 nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr));
95616c82 1157
2feaa693
OZ
1158 /*
1159 * Strange behavior for RTM_DELROUTE:
1160 * 1) rtm_family is ignored in IPv6, works for IPv4
1161 * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
1162 * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
1163 */
1164
9ddbfbdd 1165 if (krt_table_id(p) < 256)
a8caff32 1166 r->r.rtm_table = krt_table_id(p);
9ddbfbdd 1167 else
a8caff32 1168 nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p));
9ddbfbdd 1169
4adcb9df
OZ
1170 if (a->source == RTS_DUMMY)
1171 priority = e->u.krt.metric;
1172 else if (KRT_CF->sys.metric)
1173 priority = KRT_CF->sys.metric;
1174 else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC)))
1175 priority = ea->u.data;
78a2cc28 1176
4adcb9df 1177 if (priority)
cc5b93f7 1178 nl_add_attr_u32(&r->h, sizeof(r), RTA_PRIORITY, priority);
78a2cc28 1179
2feaa693
OZ
1180 /* For route delete, we do not specify remaining route attributes */
1181 if (op == NL_OP_DELETE)
1182 goto dest;
78a2cc28 1183
6e75d0d2
OZ
1184 /* Default scope is LINK for device routes, UNIVERSE otherwise */
1185 if (ea = ea_find(eattrs, EA_KRT_SCOPE))
cc5b93f7 1186 r->r.rtm_scope = ea->u.data;
6e75d0d2 1187 else
4e276a89 1188 r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
95616c82
OZ
1189
1190 if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
a8caff32 1191 nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
95616c82
OZ
1192
1193 if (ea = ea_find(eattrs, EA_KRT_REALM))
a8caff32 1194 nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data);
95616c82 1195
9fdf9d29
OZ
1196
1197 u32 metrics[KRT_METRICS_MAX];
1198 metrics[0] = 0;
1199
1200 struct ea_walk_state ews = { .eattrs = eattrs };
1201 while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX))
1202 {
1203 int id = ea->id - EA_KRT_METRICS;
1204 metrics[0] |= 1 << id;
1205 metrics[id] = ea->u.data;
1206 }
1207
1208 if (metrics[0])
a8caff32 1209 nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX);
9fdf9d29
OZ
1210
1211
2feaa693 1212dest:
95616c82 1213 /* a->iface != NULL checked in krt_capable() for router and device routes */
2feaa693 1214 switch (dest)
95616c82 1215 {
4e276a89 1216 case RTD_UNICAST:
a8caff32 1217 r->r.rtm_type = RTN_UNICAST;
4e276a89 1218 if (nh->next && !krt_ecmp6(p))
d14f8c3c 1219 nl_add_multipath(&r->h, rsize, nh, p->af);
4e276a89
MM
1220 else
1221 {
1222 nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index);
d14f8c3c 1223 nl_add_nexthop(&r->h, rsize, nh, p->af);
4e276a89 1224 }
95616c82
OZ
1225 break;
1226 case RTD_BLACKHOLE:
a8caff32 1227 r->r.rtm_type = RTN_BLACKHOLE;
95616c82
OZ
1228 break;
1229 case RTD_UNREACHABLE:
a8caff32 1230 r->r.rtm_type = RTN_UNREACHABLE;
95616c82
OZ
1231 break;
1232 case RTD_PROHIBIT:
a8caff32 1233 r->r.rtm_type = RTN_PROHIBIT;
95616c82 1234 break;
2feaa693
OZ
1235 case RTD_NONE:
1236 break;
95616c82
OZ
1237 default:
1238 bug("krt_capable inconsistent with nl_send_route");
1239 }
1240
2feaa693 1241 /* Ignore missing for DELETE */
cc5b93f7 1242 return nl_exchange(&r->h, (op == NL_OP_DELETE));
2feaa693
OZ
1243}
1244
1245static inline int
1246nl_add_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs)
1247{
1248 rta *a = e->attrs;
1249 int err = 0;
1250
4e276a89 1251 if (krt_ecmp6(p) && a->nh.next)
2feaa693 1252 {
4e276a89 1253 struct nexthop *nh = &(a->nh);
2feaa693 1254
4e276a89 1255 err = nl_send_route(p, e, eattrs, NL_OP_ADD, RTD_UNICAST, nh);
2feaa693
OZ
1256 if (err < 0)
1257 return err;
1258
1259 for (nh = nh->next; nh; nh = nh->next)
4e276a89 1260 err += nl_send_route(p, e, eattrs, NL_OP_APPEND, RTD_UNICAST, nh);
2feaa693
OZ
1261
1262 return err;
1263 }
1264
4e276a89 1265 return nl_send_route(p, e, eattrs, NL_OP_ADD, a->dest, &(a->nh));
2feaa693
OZ
1266}
1267
1268static inline int
1269nl_delete_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs)
1270{
1271 int err = 0;
1272
1273 /* For IPv6, we just repeatedly request DELETE until we get error */
1274 do
4e276a89 1275 err = nl_send_route(p, e, eattrs, NL_OP_DELETE, RTD_NONE, NULL);
2feaa693
OZ
1276 while (krt_ecmp6(p) && !err);
1277
1278 return err;
95616c82
OZ
1279}
1280
1281void
1282krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list *eattrs)
1283{
1284 int err = 0;
1285
1286 /*
2feaa693
OZ
1287 * We could use NL_OP_REPLACE, but route replace on Linux has some problems:
1288 *
1289 * 1) Does not check for matching rtm_protocol
1290 * 2) Has broken semantics for IPv6 ECMP
1291 * 3) Crashes some kernel version when used for IPv6 ECMP
1292 *
1293 * So we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the old
1294 * route value, so we do not try to optimize IPv6 ECMP reconfigurations.
95616c82
OZ
1295 */
1296
1297 if (old)
2feaa693 1298 nl_delete_rte(p, old, eattrs);
95616c82
OZ
1299
1300 if (new)
2feaa693 1301 err = nl_add_rte(p, new, eattrs);
95616c82
OZ
1302
1303 if (err < 0)
1304 n->n.flags |= KRF_SYNC_ERROR;
1305 else
1306 n->n.flags &= ~KRF_SYNC_ERROR;
1307}
1308
1309
4e276a89
MM
1310static inline struct nexthop *
1311nl_alloc_nexthop(struct nl_parse_state *s, ip_addr gw, struct iface *iface, byte weight)
2feaa693 1312{
4e276a89 1313 struct nexthop *nh = lp_alloc(s->pool, sizeof(struct nexthop));
2feaa693
OZ
1314
1315 nh->gw = gw;
1316 nh->iface = iface;
1317 nh->next = NULL;
1318 nh->weight = weight;
1319
1320 return nh;
1321}
1322
1323static int
1324nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type)
1325{
1326 /* Route merging must be active */
1327 if (!s->merge)
1328 return 0;
1329
1330 /* Saved and new route must have same network, proto/table, and priority */
1331 if ((s->net != net) || (s->proto != p) || (s->krt_metric != priority))
1332 return 0;
1333
1334 /* Both must be regular unicast routes */
1335 if ((s->krt_type != RTN_UNICAST) || (krt_type != RTN_UNICAST))
1336 return 0;
1337
1338 return 1;
1339}
1340
1341static void
1342nl_announce_route(struct nl_parse_state *s)
1343{
1344 rte *e = rte_get_temp(s->attrs);
1345 e->net = s->net;
1346 e->u.krt.src = s->krt_src;
1347 e->u.krt.proto = s->krt_proto;
1348 e->u.krt.seen = 0;
1349 e->u.krt.best = 0;
1350 e->u.krt.metric = s->krt_metric;
1351
1352 if (s->scan)
1353 krt_got_route(s->proto, e);
1354 else
1355 krt_got_route_async(s->proto, e, s->new);
1356
1357 s->net = NULL;
1358 s->attrs = NULL;
1359 s->proto = NULL;
1360 lp_flush(s->pool);
1361}
1362
1363static inline void
1364nl_parse_begin(struct nl_parse_state *s, int scan, int merge)
1365{
1366 memset(s, 0, sizeof (struct nl_parse_state));
1367 s->pool = nl_linpool;
1368 s->scan = scan;
1369 s->merge = merge;
1370}
1371
1372static inline void
1373nl_parse_end(struct nl_parse_state *s)
1374{
1375 if (s->net)
1376 nl_announce_route(s);
1377}
1378
1379
95616c82
OZ
1380#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
1381
1382static void
2feaa693 1383nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
95616c82
OZ
1384{
1385 struct krt_proto *p;
1386 struct rtmsg *i;
ad276157 1387 struct rtattr *a[BIRD_RTA_MAX];
95616c82
OZ
1388 int new = h->nlmsg_type == RTM_NEWROUTE;
1389
29a64162 1390 net_addr dst;
95616c82 1391 u32 oif = ~0;
29a64162 1392 u32 table_id;
2feaa693 1393 u32 priority = 0;
6e75d0d2 1394 u32 def_scope = RT_SCOPE_UNIVERSE;
95616c82
OZ
1395 int src;
1396
ad276157 1397 if (!(i = nl_checkin(h, sizeof(*i))))
95616c82 1398 return;
ad276157
MM
1399
1400 switch (i->rtm_family)
95616c82 1401 {
29a64162
OZ
1402 case AF_INET:
1403 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a)))
1404 return;
1405
1406 if (a[RTA_DST])
1407 net_fill_ip4(&dst, rta_get_ip4(a[RTA_DST]), i->rtm_dst_len);
1408 else
1409 net_fill_ip4(&dst, IP4_NONE, 0);
1410 break;
1411
cc5b93f7
OZ
1412 case AF_INET6:
1413 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a)))
1414 return;
29a64162
OZ
1415
1416 if (a[RTA_DST])
1417 net_fill_ip6(&dst, rta_get_ip6(a[RTA_DST]), i->rtm_dst_len);
1418 else
1419 net_fill_ip6(&dst, IP6_NONE, 0);
1420 break;
1421
d14f8c3c
MM
1422 case AF_MPLS:
1423 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want_mpls, a, sizeof(a)))
1424 return;
1425
1426 if (a[RTA_DST])
1427 if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) == 1)
1428 net_fill_mpls(&dst, rta_mpls_stack[0]);
1429 else
1430 log(L_WARN "KRT: Got multi-label MPLS RTA_DST");
1431 else
1432 return; /* No support for MPLS routes without RTA_DST */
1433 break;
1434
29a64162
OZ
1435 default:
1436 return;
95616c82
OZ
1437 }
1438
95616c82 1439 if (a[RTA_OIF])
acb04cfd 1440 oif = rta_get_u32(a[RTA_OIF]);
95616c82 1441
9ddbfbdd 1442 if (a[RTA_TABLE])
29a64162 1443 table_id = rta_get_u32(a[RTA_TABLE]);
9ddbfbdd 1444 else
29a64162 1445 table_id = i->rtm_table;
9ddbfbdd 1446
29a64162
OZ
1447 /* Do we know this table? */
1448 p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id);
95616c82 1449 if (!p)
9ddbfbdd 1450 SKIP("unknown table %d\n", table);
95616c82 1451
95616c82
OZ
1452 if (a[RTA_IIF])
1453 SKIP("IIF set\n");
29a64162 1454
95616c82
OZ
1455 if (i->rtm_tos != 0) /* We don't support TOS */
1456 SKIP("TOS %02x\n", i->rtm_tos);
95616c82 1457
2feaa693 1458 if (s->scan && !new)
95616c82
OZ
1459 SKIP("RTM_DELROUTE in scan\n");
1460
2feaa693
OZ
1461 if (a[RTA_PRIORITY])
1462 priority = rta_get_u32(a[RTA_PRIORITY]);
1463
9b136840 1464 int c = net_classify(&dst);
95616c82
OZ
1465 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
1466 SKIP("strange class/scope\n");
1467
95616c82
OZ
1468 switch (i->rtm_protocol)
1469 {
1470 case RTPROT_UNSPEC:
1471 SKIP("proto unspec\n");
1472
1473 case RTPROT_REDIRECT:
1474 src = KRT_SRC_REDIRECT;
1475 break;
1476
1477 case RTPROT_KERNEL:
1478 src = KRT_SRC_KERNEL;
1479 return;
1480
1481 case RTPROT_BIRD:
2feaa693 1482 if (!s->scan)
95616c82
OZ
1483 SKIP("echo\n");
1484 src = KRT_SRC_BIRD;
1485 break;
1486
1487 case RTPROT_BOOT:
1488 default:
1489 src = KRT_SRC_ALIEN;
1490 }
1491
f4a60a9b 1492 net *net = net_get(p->p.main_channel->table, &dst);
95616c82 1493
2feaa693
OZ
1494 if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type))
1495 nl_announce_route(s);
1496
d14f8c3c 1497 rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE);
2feaa693
OZ
1498 ra->src = p->p.main_source;
1499 ra->source = RTS_INHERIT;
1500 ra->scope = SCOPE_UNIVERSE;
95616c82
OZ
1501
1502 switch (i->rtm_type)
1503 {
1504 case RTN_UNICAST:
1505
ad276157 1506 if (a[RTA_MULTIPATH] && (i->rtm_family == AF_INET))
95616c82 1507 {
4e276a89
MM
1508 struct nexthop *nh = nl_parse_multipath(p, a[RTA_MULTIPATH]);
1509 if (!nh)
95616c82 1510 {
fe9f1a6d 1511 log(L_ERR "KRT: Received strange multipath route %N", net->n.addr);
95616c82
OZ
1512 return;
1513 }
9fdf9d29 1514
4e276a89 1515 nexthop_link(ra, nh);
95616c82
OZ
1516 break;
1517 }
1518
4e276a89
MM
1519 ra->nh.iface = if_find_by_index(oif);
1520 if (!ra->nh.iface)
95616c82 1521 {
fe9f1a6d 1522 log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif);
95616c82
OZ
1523 return;
1524 }
1525
d14f8c3c 1526 if ((i->rtm_family != AF_MPLS) && a[RTA_GATEWAY] || (i->rtm_family == AF_MPLS) && a[RTA_VIA])
95616c82 1527 {
d14f8c3c
MM
1528 if (i->rtm_family == AF_MPLS)
1529 ra->nh.gw = rta_get_via(a[RTA_VIA]);
1530 else
1531 ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]);
95616c82
OZ
1532
1533 /* Silently skip strange 6to4 routes */
0bf95f99 1534 const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96);
4e276a89 1535 if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit))
95616c82
OZ
1536 return;
1537
23c212e7 1538 neighbor *nbr;
d14f8c3c 1539 nbr = neigh_find2(&p->p, &(ra->nh.gw), ra->nh.iface,
23c212e7
OZ
1540 (i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
1541 if (!nbr || (nbr->scope == SCOPE_HOST))
95616c82 1542 {
4e276a89
MM
1543 log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr,
1544 ra->nh.gw);
95616c82
OZ
1545 return;
1546 }
1547 }
95616c82
OZ
1548
1549 break;
1550 case RTN_BLACKHOLE:
2feaa693 1551 ra->dest = RTD_BLACKHOLE;
95616c82
OZ
1552 break;
1553 case RTN_UNREACHABLE:
2feaa693 1554 ra->dest = RTD_UNREACHABLE;
95616c82
OZ
1555 break;
1556 case RTN_PROHIBIT:
2feaa693 1557 ra->dest = RTD_PROHIBIT;
95616c82
OZ
1558 break;
1559 /* FIXME: What about RTN_THROW? */
1560 default:
1561 SKIP("type %d\n", i->rtm_type);
1562 return;
1563 }
1564
d14f8c3c
MM
1565 int labels = 0;
1566 if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next)
1567 labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label);
1568
1569 if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next)
1570 {
1571 switch (rta_get_u16(a[RTA_ENCAP_TYPE]))
1572 {
1573 case LWTUNNEL_ENCAP_MPLS:
1574 {
1575 struct rtattr *enca[BIRD_RTA_MAX];
1576 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
1577 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
1578 labels = rta_get_mpls(enca[RTA_DST], ra->nh.label);
1579 break;
1580 }
1581 default:
1582 SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE]));
1583 break;
1584 }
1585 }
1586
1587 if (labels < 0)
1588 {
1589 log(L_WARN "KRT: Too long MPLS stack received, ignoring.");
1590 ra->nh.labels = 0;
1591 }
1592 else
1593 ra->nh.labels = labels;
1594
1595 rte *e = rte_get_temp(ra);
1596 e->net = net;
1597 e->u.krt.src = src;
1598 e->u.krt.proto = i->rtm_protocol;
1599 e->u.krt.seen = 0;
1600 e->u.krt.best = 0;
1601 e->u.krt.metric = 0;
1602
6e75d0d2
OZ
1603 if (i->rtm_scope != def_scope)
1604 {
1605 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1606 ea->next = ra->eattrs;
1607 ra->eattrs = ea;
1608 ea->flags = EALF_SORTED;
1609 ea->count = 1;
1610 ea->attrs[0].id = EA_KRT_SCOPE;
1611 ea->attrs[0].flags = 0;
1612 ea->attrs[0].type = EAF_TYPE_INT;
1613 ea->attrs[0].u.data = i->rtm_scope;
1614 }
95616c82 1615
d14f8c3c
MM
1616 if (a[RTA_PRIORITY])
1617 e->u.krt.metric = rta_get_u32(a[RTA_PRIORITY]);
1618
95616c82
OZ
1619 if (a[RTA_PREFSRC])
1620 {
9b136840 1621 ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]);
95616c82 1622
2feaa693
OZ
1623 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1624 ea->next = ra->eattrs;
1625 ra->eattrs = ea;
95616c82
OZ
1626 ea->flags = EALF_SORTED;
1627 ea->count = 1;
1628 ea->attrs[0].id = EA_KRT_PREFSRC;
1629 ea->attrs[0].flags = 0;
1630 ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
2feaa693 1631 ea->attrs[0].u.ptr = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
95616c82
OZ
1632 ea->attrs[0].u.ptr->length = sizeof(ps);
1633 memcpy(ea->attrs[0].u.ptr->data, &ps, sizeof(ps));
1634 }
1635
1636 if (a[RTA_FLOW])
1637 {
2feaa693
OZ
1638 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1639 ea->next = ra->eattrs;
1640 ra->eattrs = ea;
95616c82
OZ
1641 ea->flags = EALF_SORTED;
1642 ea->count = 1;
1643 ea->attrs[0].id = EA_KRT_REALM;
1644 ea->attrs[0].flags = 0;
1645 ea->attrs[0].type = EAF_TYPE_INT;
acb04cfd 1646 ea->attrs[0].u.data = rta_get_u32(a[RTA_FLOW]);
95616c82
OZ
1647 }
1648
9fdf9d29
OZ
1649 if (a[RTA_METRICS])
1650 {
1651 u32 metrics[KRT_METRICS_MAX];
2feaa693 1652 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
9fdf9d29
OZ
1653 int t, n = 0;
1654
1655 if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
1656 {
fe9f1a6d 1657 log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net->n.addr);
9fdf9d29
OZ
1658 return;
1659 }
1660
1661 for (t = 1; t < KRT_METRICS_MAX; t++)
1662 if (metrics[0] & (1 << t))
1663 {
1664 ea->attrs[n].id = EA_CODE(EAP_KRT, KRT_METRICS_OFFSET + t);
1665 ea->attrs[n].flags = 0;
1666 ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */
1667 ea->attrs[n].u.data = metrics[t];
1668 n++;
1669 }
1670
1671 if (n > 0)
1672 {
2feaa693 1673 ea->next = ra->eattrs;
9fdf9d29
OZ
1674 ea->flags = EALF_SORTED;
1675 ea->count = n;
2feaa693 1676 ra->eattrs = ea;
9fdf9d29
OZ
1677 }
1678 }
1679
2feaa693
OZ
1680 /*
1681 * Ideally, now we would send the received route to the rest of kernel code.
1682 * But IPv6 ECMP routes are sent as a sequence of routes, so we postpone it
1683 * and merge next hops until the end of the sequence.
1684 */
1685
1686 if (!s->net)
1687 {
1688 /* Store the new route */
1689 s->net = net;
1690 s->attrs = ra;
1691 s->proto = p;
1692 s->new = new;
1693 s->krt_src = src;
1694 s->krt_type = i->rtm_type;
1695 s->krt_proto = i->rtm_protocol;
1696 s->krt_metric = priority;
1697 }
95616c82 1698 else
2feaa693
OZ
1699 {
1700 /* Merge next hops with the stored route */
1701 rta *a = s->attrs;
1702
4e276a89 1703 nexthop_insert(&a->nh, &ra->nh);
2feaa693 1704 }
95616c82
OZ
1705}
1706
1707void
1708krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
1709{
1710 struct nlmsghdr *h;
2feaa693 1711 struct nl_parse_state s;
95616c82 1712
cc5b93f7 1713 nl_parse_begin(&s, 1, 0);
d7661fbe 1714 nl_request_dump(AF_INET, RTM_GETROUTE);
95616c82
OZ
1715 while (h = nl_get_scan())
1716 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
2feaa693 1717 nl_parse_route(&s, h);
95616c82
OZ
1718 else
1719 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
cc5b93f7 1720 nl_parse_end(&s);
29a64162 1721
cc5b93f7 1722 nl_parse_begin(&s, 1, 1);
d7661fbe
MM
1723 nl_request_dump(AF_INET6, RTM_GETROUTE);
1724 while (h = nl_get_scan())
1725 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
cc5b93f7 1726 nl_parse_route(&s, h);
d7661fbe
MM
1727 else
1728 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
2feaa693 1729 nl_parse_end(&s);
d14f8c3c
MM
1730
1731 nl_parse_begin(&s, 1, 1);
1732 nl_request_dump(AF_MPLS, RTM_GETROUTE);
1733 while (h = nl_get_scan())
1734 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
1735 nl_parse_route(&s, h);
1736 else
1737 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
1738 nl_parse_end(&s);
95616c82
OZ
1739}
1740
1741/*
1742 * Asynchronous Netlink interface
1743 */
1744
1745static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
1746static byte *nl_async_rx_buffer; /* Receive buffer */
1747
1748static void
1749nl_async_msg(struct nlmsghdr *h)
1750{
2feaa693
OZ
1751 struct nl_parse_state s;
1752
95616c82
OZ
1753 switch (h->nlmsg_type)
1754 {
1755 case RTM_NEWROUTE:
1756 case RTM_DELROUTE:
1757 DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
2feaa693
OZ
1758 nl_parse_begin(&s, 0, 0);
1759 nl_parse_route(&s, h);
1760 nl_parse_end(&s);
95616c82
OZ
1761 break;
1762 case RTM_NEWLINK:
1763 case RTM_DELLINK:
1764 DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
1e4891e4
OZ
1765 if (kif_proto)
1766 nl_parse_link(h, 0);
95616c82
OZ
1767 break;
1768 case RTM_NEWADDR:
1769 case RTM_DELADDR:
1770 DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
1e4891e4
OZ
1771 if (kif_proto)
1772 nl_parse_addr(h, 0);
95616c82
OZ
1773 break;
1774 default:
1775 DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
1776 }
1777}
1778
1779static int
3e236955 1780nl_async_hook(sock *sk, uint size UNUSED)
95616c82
OZ
1781{
1782 struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
1783 struct sockaddr_nl sa;
31e9e101
ST
1784 struct msghdr m = {
1785 .msg_name = &sa,
1786 .msg_namelen = sizeof(sa),
1787 .msg_iov = &iov,
1788 .msg_iovlen = 1,
1789 };
95616c82
OZ
1790 struct nlmsghdr *h;
1791 int x;
ae80a2de 1792 uint len;
95616c82
OZ
1793
1794 x = recvmsg(sk->fd, &m, 0);
1795 if (x < 0)
1796 {
1797 if (errno == ENOBUFS)
1798 {
1799 /*
1800 * Netlink reports some packets have been thrown away.
1801 * One day we might react to it by asking for route table
1802 * scan in near future.
1803 */
1804 return 1; /* More data are likely to be ready */
1805 }
1806 else if (errno != EWOULDBLOCK)
1807 log(L_ERR "Netlink recvmsg: %m");
1808 return 0;
1809 }
1810 if (sa.nl_pid) /* It isn't from the kernel */
1811 {
1812 DBG("Non-kernel packet\n");
1813 return 1;
1814 }
1815 h = (void *) nl_async_rx_buffer;
1816 len = x;
1817 if (m.msg_flags & MSG_TRUNC)
1818 {
1819 log(L_WARN "Netlink got truncated asynchronous message");
1820 return 1;
1821 }
1822 while (NLMSG_OK(h, len))
1823 {
1824 nl_async_msg(h);
1825 h = NLMSG_NEXT(h, len);
1826 }
1827 if (len)
1828 log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
1829 return 1;
1830}
1831
ccd2a3ed
MM
1832static void
1833nl_async_err_hook(sock *sk, int e UNUSED)
1834{
1835 nl_async_hook(sk, 0);
1836}
1837
95616c82
OZ
1838static void
1839nl_open_async(void)
1840{
1841 sock *sk;
1842 struct sockaddr_nl sa;
1843 int fd;
95616c82 1844
f83ce94d 1845 if (nl_async_sk)
95616c82 1846 return;
95616c82
OZ
1847
1848 DBG("KRT: Opening async netlink socket\n");
1849
1850 fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1851 if (fd < 0)
1852 {
1853 log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
1854 return;
1855 }
1856
1857 bzero(&sa, sizeof(sa));
1858 sa.nl_family = AF_NETLINK;
29a64162
OZ
1859 sa.nl_groups = RTMGRP_LINK |
1860 RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE |
1861 RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
1862
95616c82
OZ
1863 if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
1864 {
1865 log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
f83ce94d 1866 close(fd);
95616c82
OZ
1867 return;
1868 }
1869
f83ce94d
OZ
1870 nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
1871
95616c82
OZ
1872 sk = nl_async_sk = sk_new(krt_pool);
1873 sk->type = SK_MAGIC;
1874 sk->rx_hook = nl_async_hook;
ccd2a3ed 1875 sk->err_hook = nl_async_err_hook;
95616c82 1876 sk->fd = fd;
05476c4d 1877 if (sk_open(sk) < 0)
95616c82 1878 bug("Netlink: sk_open failed");
95616c82
OZ
1879}
1880
9ddbfbdd 1881
95616c82
OZ
1882/*
1883 * Interface to the UNIX krt module
1884 */
1885
95616c82 1886void
9ddbfbdd
MM
1887krt_sys_io_init(void)
1888{
2feaa693 1889 nl_linpool = lp_new(krt_pool, 4080);
9ddbfbdd
MM
1890 HASH_INIT(nl_table_map, krt_pool, 6);
1891}
1892
1893int
c6964c30 1894krt_sys_start(struct krt_proto *p)
95616c82 1895{
29a64162 1896 struct krt_proto *old = HASH_FIND(nl_table_map, RTH, p->af, krt_table_id(p));
9ddbfbdd
MM
1897
1898 if (old)
1899 {
1900 log(L_ERR "%s: Kernel table %u already registered by %s",
1901 p->p.name, krt_table_id(p), old->p.name);
1902 return 0;
1903 }
1904
1905 HASH_INSERT2(nl_table_map, RTH, krt_pool, p);
c6964c30
OZ
1906
1907 nl_open();
1908 nl_open_async();
9ddbfbdd
MM
1909
1910 return 1;
95616c82
OZ
1911}
1912
1913void
9ddbfbdd 1914krt_sys_shutdown(struct krt_proto *p)
95616c82 1915{
9ddbfbdd 1916 HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
95616c82
OZ
1917}
1918
1919int
1920krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
1921{
4adcb9df 1922 return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
95616c82
OZ
1923}
1924
95616c82
OZ
1925void
1926krt_sys_init_config(struct krt_config *cf)
1927{
1928 cf->sys.table_id = RT_TABLE_MAIN;
4adcb9df 1929 cf->sys.metric = 0;
95616c82
OZ
1930}
1931
1932void
1933krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
1934{
1935 d->sys.table_id = s->sys.table_id;
4adcb9df 1936 d->sys.metric = s->sys.metric;
95616c82
OZ
1937}
1938
9fdf9d29
OZ
1939static const char *krt_metrics_names[KRT_METRICS_MAX] = {
1940 NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
1941 "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
1942};
1943
1944static const char *krt_features_names[KRT_FEATURES_MAX] = {
1945 "ecn", NULL, NULL, "allfrag"
1946};
1947
1948int
1949krt_sys_get_attr(eattr *a, byte *buf, int buflen UNUSED)
1950{
1951 switch (a->id)
1952 {
1953 case EA_KRT_PREFSRC:
1954 bsprintf(buf, "prefsrc");
1955 return GA_NAME;
1956
1957 case EA_KRT_REALM:
1958 bsprintf(buf, "realm");
1959 return GA_NAME;
1960
6e75d0d2
OZ
1961 case EA_KRT_SCOPE:
1962 bsprintf(buf, "scope");
1963 return GA_NAME;
1964
9fdf9d29
OZ
1965 case EA_KRT_LOCK:
1966 buf += bsprintf(buf, "lock:");
1967 ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
1968 return GA_FULL;
1969
1970 case EA_KRT_FEATURES:
1971 buf += bsprintf(buf, "features:");
1972 ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
1973 return GA_FULL;
1974
1975 default:;
1976 int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET;
1977 if (id > 0 && id < KRT_METRICS_MAX)
1978 {
1979 bsprintf(buf, "%s", krt_metrics_names[id]);
1980 return GA_NAME;
1981 }
1982
1983 return GA_UNKNOWN;
1984 }
1985}
1986
95616c82
OZ
1987
1988
1989void
1990kif_sys_start(struct kif_proto *p UNUSED)
1991{
1992 nl_open();
1993 nl_open_async();
1994}
1995
1996void
1997kif_sys_shutdown(struct kif_proto *p UNUSED)
1998{
1999}