]> git.ipfire.org Git - thirdparty/bird.git/blame - sysdep/linux/netlink.c
CI: Remove obsolete CI targets
[thirdparty/bird.git] / sysdep / linux / netlink.c
CommitLineData
95616c82
OZ
1/*
2 * BIRD -- Linux Netlink Interface
3 *
4 * (c) 1999--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9#include <stdio.h>
f83ce94d 10#include <unistd.h>
95616c82
OZ
11#include <fcntl.h>
12#include <sys/socket.h>
13#include <sys/uio.h>
14#include <errno.h>
15
16#undef LOCAL_DEBUG
17
18#include "nest/bird.h"
19#include "nest/route.h"
20#include "nest/protocol.h"
21#include "nest/iface.h"
4e276a89 22#include "lib/alloca.h"
7152e5ef
JMM
23#include "sysdep/unix/unix.h"
24#include "sysdep/unix/krt.h"
95616c82
OZ
25#include "lib/socket.h"
26#include "lib/string.h"
9ddbfbdd 27#include "lib/hash.h"
95616c82
OZ
28#include "conf/conf.h"
29
176fc68a 30#include CONFIG_INCLUDE_NLSYS_H
8988264a 31
8235c474 32#define krt_ipv4(p) ((p)->af == AF_INET)
2feaa693 33
517d05df
OZ
34const int rt_default_ecmp = 16;
35
2feaa693
OZ
36struct nl_parse_state
37{
722daa95 38 struct krt_proto *proto;
2feaa693
OZ
39 struct linpool *pool;
40 int scan;
2feaa693 41
722daa95 42 u32 rta_flow;
2feaa693
OZ
43};
44
95616c82
OZ
45/*
46 * Synchronous Netlink interface
47 */
48
49struct nl_sock
50{
51 int fd;
52 u32 seq;
53 byte *rx_buffer; /* Receive buffer */
54 struct nlmsghdr *last_hdr; /* Recently received packet */
ae80a2de 55 uint last_size;
95616c82
OZ
56};
57
e818f164 58#define NL_RX_SIZE 32768
95616c82 59
2feaa693
OZ
60#define NL_OP_DELETE 0
61#define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
62#define NL_OP_REPLACE (NLM_F_CREATE|NLM_F_REPLACE)
63#define NL_OP_APPEND (NLM_F_CREATE|NLM_F_APPEND)
64
65static linpool *nl_linpool;
66
95616c82
OZ
67static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for synchronous scan */
68static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */
69
70static void
71nl_open_sock(struct nl_sock *nl)
72{
73 if (nl->fd < 0)
74 {
75 nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
76 if (nl->fd < 0)
77 die("Unable to open rtnetlink socket: %m");
574b2324 78 nl->seq = (u32) (current_time() TO_S); /* Or perhaps random_u32() ? */
95616c82
OZ
79 nl->rx_buffer = xmalloc(NL_RX_SIZE);
80 nl->last_hdr = NULL;
81 nl->last_size = 0;
82 }
83}
84
534d0a4b 85static int
ef614f29 86nl_set_strict_dump(struct nl_sock *nl UNUSED, int strict UNUSED)
e818f164 87{
bbc33f6e 88#ifdef SOL_NETLINK
534d0a4b
OZ
89 return setsockopt(nl->fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &strict, sizeof(strict));
90#else
91 return -1;
bbc33f6e 92#endif
e818f164
OZ
93}
94
81ee6cda
OZ
95static void
96nl_set_rcvbuf(int fd, uint val)
97{
98 if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)) < 0)
99 log(L_WARN "KRT: Cannot set netlink rx buffer size to %u: %m", val);
100}
101
102static uint
103nl_cfg_rx_buffer_size(struct config *cfg)
104{
105 uint bufsize = 0;
106
107 struct proto_config *pc;
108 WALK_LIST(pc, cfg->protos)
109 if ((pc->protocol == &proto_unix_kernel) && !pc->disabled)
110 bufsize = MAX(bufsize, ((struct krt_config *) pc)->sys.netlink_rx_buffer);
111
112 return bufsize;
113}
114
115
95616c82
OZ
116static void
117nl_open(void)
118{
534d0a4b
OZ
119 if ((nl_scan.fd >= 0) && (nl_req.fd >= 0))
120 return;
121
95616c82
OZ
122 nl_open_sock(&nl_scan);
123 nl_open_sock(&nl_req);
e818f164 124
534d0a4b
OZ
125 if (nl_set_strict_dump(&nl_scan, 1) < 0)
126 {
127 log(L_WARN "KRT: Netlink strict checking failed, will scan all tables at once");
128 krt_use_shared_scan();
129 }
95616c82
OZ
130}
131
132static void
133nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
134{
135 struct sockaddr_nl sa;
136
137 memset(&sa, 0, sizeof(sa));
138 sa.nl_family = AF_NETLINK;
139 nh->nlmsg_pid = 0;
140 nh->nlmsg_seq = ++(nl->seq);
53401bef 141 nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len);
95616c82
OZ
142 if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
143 die("rtnetlink sendto: %m");
144 nl->last_hdr = NULL;
145}
146
147static void
e818f164 148nl_request_dump_link(void)
95616c82
OZ
149{
150 struct {
151 struct nlmsghdr nh;
e818f164 152 struct ifinfomsg ifi;
641172c6 153 } req = {
e818f164
OZ
154 .nh.nlmsg_type = RTM_GETLINK,
155 .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
641172c6 156 .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
e818f164
OZ
157 .nh.nlmsg_seq = ++(nl_scan.seq),
158 .ifi.ifi_family = AF_UNSPEC,
641172c6 159 };
e818f164
OZ
160
161 send(nl_scan.fd, &req, sizeof(req), 0);
162 nl_scan.last_hdr = NULL;
95616c82
OZ
163}
164
e818f164
OZ
165static void
166nl_request_dump_addr(int af)
167{
168 struct {
169 struct nlmsghdr nh;
170 struct ifaddrmsg ifa;
171 } req = {
172 .nh.nlmsg_type = RTM_GETADDR,
173 .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
174 .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
175 .nh.nlmsg_seq = ++(nl_scan.seq),
176 .ifa.ifa_family = af,
177 };
178
179 send(nl_scan.fd, &req, sizeof(req), 0);
180 nl_scan.last_hdr = NULL;
181}
182
183static void
534d0a4b 184nl_request_dump_route(int af, int table_id)
e818f164
OZ
185{
186 struct {
187 struct nlmsghdr nh;
188 struct rtmsg rtm;
534d0a4b
OZ
189 struct rtattr rta;
190 u32 table_id;
e818f164
OZ
191 } req = {
192 .nh.nlmsg_type = RTM_GETROUTE,
193 .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)),
194 .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
195 .nh.nlmsg_seq = ++(nl_scan.seq),
196 .rtm.rtm_family = af,
197 };
198
534d0a4b
OZ
199 if (table_id < 256)
200 req.rtm.rtm_table = table_id;
201 else
202 {
203 req.rta.rta_type = RTA_TABLE;
204 req.rta.rta_len = RTA_LENGTH(4);
205 req.table_id = table_id;
206 req.nh.nlmsg_len = NLMSG_ALIGN(req.nh.nlmsg_len) + req.rta.rta_len;
207 }
208
209 send(nl_scan.fd, &req, req.nh.nlmsg_len, 0);
e818f164
OZ
210 nl_scan.last_hdr = NULL;
211}
212
213
95616c82
OZ
214static struct nlmsghdr *
215nl_get_reply(struct nl_sock *nl)
216{
217 for(;;)
218 {
219 if (!nl->last_hdr)
220 {
221 struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
222 struct sockaddr_nl sa;
31e9e101
ST
223 struct msghdr m = {
224 .msg_name = &sa,
225 .msg_namelen = sizeof(sa),
226 .msg_iov = &iov,
227 .msg_iovlen = 1,
228 };
95616c82
OZ
229 int x = recvmsg(nl->fd, &m, 0);
230 if (x < 0)
231 die("nl_get_reply: %m");
232 if (sa.nl_pid) /* It isn't from the kernel */
233 {
234 DBG("Non-kernel packet\n");
235 continue;
236 }
237 nl->last_size = x;
238 nl->last_hdr = (void *) nl->rx_buffer;
239 if (m.msg_flags & MSG_TRUNC)
240 bug("nl_get_reply: got truncated reply which should be impossible");
241 }
242 if (NLMSG_OK(nl->last_hdr, nl->last_size))
243 {
244 struct nlmsghdr *h = nl->last_hdr;
245 nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
246 if (h->nlmsg_seq != nl->seq)
247 {
248 log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
249 h->nlmsg_seq, nl->seq);
250 continue;
251 }
252 return h;
253 }
254 if (nl->last_size)
255 log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
256 nl->last_hdr = NULL;
257 }
258}
259
1123e707 260static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
95616c82
OZ
261
262static int
2feaa693 263nl_error(struct nlmsghdr *h, int ignore_esrch)
95616c82
OZ
264{
265 struct nlmsgerr *e;
266 int ec;
267
268 if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
269 {
270 log(L_WARN "Netlink: Truncated error message received");
271 return ENOBUFS;
272 }
273 e = (struct nlmsgerr *) NLMSG_DATA(h);
176fc68a 274 ec = netlink_error_to_os(e->error);
2feaa693 275 if (ec && !(ignore_esrch && (ec == ESRCH)))
95616c82
OZ
276 log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
277 return ec;
278}
279
280static struct nlmsghdr *
281nl_get_scan(void)
282{
283 struct nlmsghdr *h = nl_get_reply(&nl_scan);
284
285 if (h->nlmsg_type == NLMSG_DONE)
286 return NULL;
287 if (h->nlmsg_type == NLMSG_ERROR)
288 {
2feaa693 289 nl_error(h, 0);
95616c82
OZ
290 return NULL;
291 }
292 return h;
293}
294
295static int
2feaa693 296nl_exchange(struct nlmsghdr *pkt, int ignore_esrch)
95616c82
OZ
297{
298 struct nlmsghdr *h;
299
300 nl_send(&nl_req, pkt);
301 for(;;)
302 {
303 h = nl_get_reply(&nl_req);
304 if (h->nlmsg_type == NLMSG_ERROR)
305 break;
306 log(L_WARN "nl_exchange: Unexpected reply received");
307 }
2feaa693 308 return nl_error(h, ignore_esrch) ? -1 : 0;
95616c82
OZ
309}
310
311/*
312 * Netlink attributes
313 */
314
315static int nl_attr_len;
316
317static void *
318nl_checkin(struct nlmsghdr *h, int lsize)
319{
320 nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
321 if (nl_attr_len < 0)
322 {
323 log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
324 return NULL;
325 }
326 return NLMSG_DATA(h);
327}
328
ad276157
JMM
329struct nl_want_attrs {
330 u8 defined:1;
331 u8 checksize:1;
332 u8 size;
333};
334
335
336#define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
337
338static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = {
339 [IFLA_IFNAME] = { 1, 0, 0 },
340 [IFLA_MTU] = { 1, 1, sizeof(u32) },
943478b0 341 [IFLA_MASTER] = { 1, 1, sizeof(u32) },
ad276157
JMM
342 [IFLA_WIRELESS] = { 1, 0, 0 },
343};
344
29a64162 345
e37d2e3e 346#define BIRD_IFA_MAX (IFA_FLAGS+1)
ad276157 347
ad276157
JMM
348static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = {
349 [IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) },
350 [IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) },
351 [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) },
62e64905 352 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
ad276157 353};
29a64162 354
ad276157
JMM
355static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
356 [IFA_ADDRESS] = { 1, 1, sizeof(ip6_addr) },
357 [IFA_LOCAL] = { 1, 1, sizeof(ip6_addr) },
e37d2e3e 358 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
ad276157 359};
29a64162 360
ad276157 361
d14f8c3c 362#define BIRD_RTA_MAX (RTA_ENCAP+1)
ad276157 363
4e276a89 364static struct nl_want_attrs nexthop_attr_want4[BIRD_RTA_MAX] = {
ad276157 365 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
53401bef 366 [RTA_VIA] = { 1, 0, 0 },
21f9acd2 367 [RTA_FLOW] = { 1, 1, sizeof(u32) },
d14f8c3c
JMM
368 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
369 [RTA_ENCAP] = { 1, 0, 0 },
370};
371
4ff15a75 372static struct nl_want_attrs nexthop_attr_want6[BIRD_RTA_MAX] = {
98bb80a2 373 [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
53401bef 374 [RTA_VIA] = { 1, 0, 0 },
21f9acd2 375 [RTA_FLOW] = { 1, 1, sizeof(u32) },
4ff15a75
OZ
376 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
377 [RTA_ENCAP] = { 1, 0, 0 },
378};
379
6b0f5f68 380#ifdef HAVE_MPLS_KERNEL
f1b5f179
KY
381static struct nl_want_attrs nexthop_attr_want_mpls[BIRD_RTA_MAX] = {
382 [RTA_VIA] = { 1, 0, 0 },
383 [RTA_NEWDST] = { 1, 0, 0 },
384};
385
d14f8c3c
JMM
386static struct nl_want_attrs encap_mpls_want[BIRD_RTA_MAX] = {
387 [RTA_DST] = { 1, 0, 0 },
ad276157 388};
6b0f5f68 389#endif
ad276157 390
ad276157
JMM
391static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
392 [RTA_DST] = { 1, 1, sizeof(ip4_addr) },
393 [RTA_OIF] = { 1, 1, sizeof(u32) },
394 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
395 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
396 [RTA_PREFSRC] = { 1, 1, sizeof(ip4_addr) },
397 [RTA_METRICS] = { 1, 0, 0 },
398 [RTA_MULTIPATH] = { 1, 0, 0 },
399 [RTA_FLOW] = { 1, 1, sizeof(u32) },
400 [RTA_TABLE] = { 1, 1, sizeof(u32) },
53401bef 401 [RTA_VIA] = { 1, 0, 0 },
d14f8c3c
JMM
402 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
403 [RTA_ENCAP] = { 1, 0, 0 },
ad276157 404};
29a64162 405
ad276157
JMM
406static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
407 [RTA_DST] = { 1, 1, sizeof(ip6_addr) },
be17805c 408 [RTA_SRC] = { 1, 1, sizeof(ip6_addr) },
ad276157
JMM
409 [RTA_IIF] = { 1, 1, sizeof(u32) },
410 [RTA_OIF] = { 1, 1, sizeof(u32) },
411 [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
412 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
413 [RTA_PREFSRC] = { 1, 1, sizeof(ip6_addr) },
414 [RTA_METRICS] = { 1, 0, 0 },
98bb80a2 415 [RTA_MULTIPATH] = { 1, 0, 0 },
ad276157
JMM
416 [RTA_FLOW] = { 1, 1, sizeof(u32) },
417 [RTA_TABLE] = { 1, 1, sizeof(u32) },
53401bef 418 [RTA_VIA] = { 1, 0, 0 },
d14f8c3c
JMM
419 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
420 [RTA_ENCAP] = { 1, 0, 0 },
421};
422
6b0f5f68 423#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
424static struct nl_want_attrs rtm_attr_want_mpls[BIRD_RTA_MAX] = {
425 [RTA_DST] = { 1, 1, sizeof(u32) },
426 [RTA_IIF] = { 1, 1, sizeof(u32) },
427 [RTA_OIF] = { 1, 1, sizeof(u32) },
428 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
429 [RTA_METRICS] = { 1, 0, 0 },
f1b5f179 430 [RTA_MULTIPATH] = { 1, 0, 0 },
d14f8c3c
JMM
431 [RTA_FLOW] = { 1, 1, sizeof(u32) },
432 [RTA_TABLE] = { 1, 1, sizeof(u32) },
433 [RTA_VIA] = { 1, 0, 0 },
434 [RTA_NEWDST] = { 1, 0, 0 },
ad276157 435};
6b0f5f68 436#endif
ad276157
JMM
437
438
95616c82 439static int
ad276157 440nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, int ksize)
95616c82
OZ
441{
442 int max = ksize / sizeof(struct rtattr *);
443 bzero(k, ksize);
ad276157
JMM
444
445 for ( ; RTA_OK(a, nl_attr_len); a = RTA_NEXT(a, nl_attr_len))
95616c82 446 {
ad276157
JMM
447 if ((a->rta_type >= max) || !want[a->rta_type].defined)
448 continue;
449
450 if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size))
451 {
9b136840 452 log(L_ERR "nl_parse_attrs: Malformed attribute received");
ad276157
JMM
453 return 0;
454 }
455
456 k[a->rta_type] = a;
95616c82 457 }
ad276157 458
95616c82
OZ
459 if (nl_attr_len)
460 {
461 log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
462 return 0;
463 }
ad276157
JMM
464
465 return 1;
95616c82
OZ
466}
467
d14f8c3c
JMM
468static inline u16 rta_get_u16(struct rtattr *a)
469{ return *(u16 *) RTA_DATA(a); }
470
fce764f9 471static inline u32 rta_get_u32(struct rtattr *a)
acb04cfd
OZ
472{ return *(u32 *) RTA_DATA(a); }
473
474static inline ip4_addr rta_get_ip4(struct rtattr *a)
475{ return ip4_ntoh(*(ip4_addr *) RTA_DATA(a)); }
476
477static inline ip6_addr rta_get_ip6(struct rtattr *a)
478{ return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); }
479
9b136840
JMM
480static inline ip_addr rta_get_ipa(struct rtattr *a)
481{
482 if (RTA_PAYLOAD(a) == sizeof(ip4_addr))
483 return ipa_from_ip4(rta_get_ip4(a));
484 else
485 return ipa_from_ip6(rta_get_ip6(a));
486}
acb04cfd 487
6b0f5f68 488#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
489static inline ip_addr rta_get_via(struct rtattr *a)
490{
491 struct rtvia *v = RTA_DATA(a);
492 switch(v->rtvia_family) {
493 case AF_INET: return ipa_from_ip4(ip4_ntoh(*(ip4_addr *) v->rtvia_addr));
494 case AF_INET6: return ipa_from_ip6(ip6_ntoh(*(ip6_addr *) v->rtvia_addr));
495 }
496 return IPA_NONE;
497}
498
499static u32 rta_mpls_stack[MPLS_MAX_LABEL_STACK];
500static inline int rta_get_mpls(struct rtattr *a, u32 *stack)
501{
2eaf65ec
OZ
502 if (!a)
503 return 0;
504
d14f8c3c
JMM
505 if (RTA_PAYLOAD(a) % 4)
506 log(L_WARN "KRT: Strange length of received MPLS stack: %u", RTA_PAYLOAD(a));
507
2eaf65ec
OZ
508 int labels = mpls_get(RTA_DATA(a), RTA_PAYLOAD(a) & ~0x3, stack);
509
510 if (labels < 0)
511 {
512 log(L_WARN "KRT: Too long MPLS stack received, ignoring");
513 labels = 0;
514 }
515
516 return labels;
d14f8c3c 517}
6b0f5f68 518#endif
d14f8c3c 519
9fdf9d29
OZ
520struct rtattr *
521nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen)
95616c82 522{
9fdf9d29
OZ
523 uint pos = NLMSG_ALIGN(h->nlmsg_len);
524 uint len = RTA_LENGTH(dlen);
95616c82
OZ
525
526 if (pos + len > bufsize)
527 bug("nl_add_attr: packet buffer overflow");
9fdf9d29
OZ
528
529 struct rtattr *a = (struct rtattr *)((char *)h + pos);
95616c82
OZ
530 a->rta_type = code;
531 a->rta_len = len;
532 h->nlmsg_len = pos + len;
9fdf9d29
OZ
533
534 if (dlen > 0)
535 memcpy(RTA_DATA(a), data, dlen);
536
537 return a;
95616c82
OZ
538}
539
d14f8c3c
JMM
540static inline struct rtattr *
541nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
542{
543 return nl_add_attr(h, bufsize, code, NULL, 0);
544}
545
546static inline void
547nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
548{
549 a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a;
550}
551
552static inline void
553nl_add_attr_u16(struct nlmsghdr *h, uint bufsize, int code, u16 data)
554{
555 nl_add_attr(h, bufsize, code, &data, 2);
556}
557
95616c82 558static inline void
29a64162 559nl_add_attr_u32(struct nlmsghdr *h, uint bufsize, int code, u32 data)
95616c82
OZ
560{
561 nl_add_attr(h, bufsize, code, &data, 4);
562}
563
564static inline void
29a64162 565nl_add_attr_ip4(struct nlmsghdr *h, uint bufsize, int code, ip4_addr ip4)
95616c82 566{
29a64162
OZ
567 ip4 = ip4_hton(ip4);
568 nl_add_attr(h, bufsize, code, &ip4, sizeof(ip4));
569}
570
571static inline void
572nl_add_attr_ip6(struct nlmsghdr *h, uint bufsize, int code, ip6_addr ip6)
573{
574 ip6 = ip6_hton(ip6);
575 nl_add_attr(h, bufsize, code, &ip6, sizeof(ip6));
576}
577
578static inline void
579nl_add_attr_ipa(struct nlmsghdr *h, uint bufsize, int code, ip_addr ipa)
580{
581 if (ipa_is_ip4(ipa))
582 nl_add_attr_ip4(h, bufsize, code, ipa_to_ip4(ipa));
9b136840 583 else
29a64162 584 nl_add_attr_ip6(h, bufsize, code, ipa_to_ip6(ipa));
95616c82
OZ
585}
586
6b0f5f68 587#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
588static inline void
589nl_add_attr_mpls(struct nlmsghdr *h, uint bufsize, int code, int len, u32 *stack)
9fdf9d29 590{
d14f8c3c
JMM
591 char buf[len*4];
592 mpls_put(buf, len, stack);
593 nl_add_attr(h, bufsize, code, buf, len*4);
9fdf9d29 594}
95616c82
OZ
595
596static inline void
d14f8c3c 597nl_add_attr_mpls_encap(struct nlmsghdr *h, uint bufsize, int len, u32 *stack)
95616c82 598{
d14f8c3c
JMM
599 nl_add_attr_u16(h, bufsize, RTA_ENCAP_TYPE, LWTUNNEL_ENCAP_MPLS);
600
601 struct rtattr *nest = nl_open_attr(h, bufsize, RTA_ENCAP);
602 nl_add_attr_mpls(h, bufsize, RTA_DST, len, stack);
603 nl_close_attr(h, nest);
604}
605
606static inline void
607nl_add_attr_via(struct nlmsghdr *h, uint bufsize, ip_addr ipa)
608{
66acbc8d 609 struct rtvia *via = alloca(sizeof(struct rtvia) + 16);
d14f8c3c 610
62e64905
OZ
611 if (ipa_is_ip4(ipa))
612 {
d14f8c3c 613 via->rtvia_family = AF_INET;
62e64905 614 put_ip4(via->rtvia_addr, ipa_to_ip4(ipa));
66acbc8d 615 nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 4);
62e64905
OZ
616 }
617 else
618 {
d14f8c3c 619 via->rtvia_family = AF_INET6;
62e64905 620 put_ip6(via->rtvia_addr, ipa_to_ip6(ipa));
66acbc8d 621 nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 16);
d14f8c3c 622 }
95616c82 623}
6b0f5f68 624#endif
95616c82 625
9fdf9d29
OZ
626static inline struct rtnexthop *
627nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
628{
629 uint pos = NLMSG_ALIGN(h->nlmsg_len);
630 uint len = RTNH_LENGTH(0);
631
632 if (pos + len > bufsize)
633 bug("nl_open_nexthop: packet buffer overflow");
634
635 h->nlmsg_len = pos + len;
636
637 return (void *)h + pos;
638}
639
640static inline void
641nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh)
642{
643 nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh;
644}
95616c82 645
d14f8c3c 646static inline void
6b0f5f68 647nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af UNUSED)
d14f8c3c 648{
6b0f5f68 649#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
650 if (nh->labels > 0)
651 if (af == AF_MPLS)
652 nl_add_attr_mpls(h, bufsize, RTA_NEWDST, nh->labels, nh->label);
653 else
654 nl_add_attr_mpls_encap(h, bufsize, nh->labels, nh->label);
655
656 if (ipa_nonzero(nh->gw))
53401bef
OZ
657 {
658 if (af == (ipa_is_ip4(nh->gw) ? AF_INET : AF_INET6))
d14f8c3c 659 nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
53401bef
OZ
660 else
661 nl_add_attr_via(h, bufsize, nh->gw);
662 }
6b0f5f68
MJM
663#else
664
665 if (ipa_nonzero(nh->gw))
666 nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
667#endif
d14f8c3c
JMM
668}
669
95616c82 670static void
21f9acd2 671nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af, ea_list *eattrs)
95616c82 672{
9fdf9d29 673 struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH);
21f9acd2 674 eattr *flow = ea_find(eattrs, EA_KRT_REALM);
9fdf9d29 675
95616c82 676 for (; nh; nh = nh->next)
9fdf9d29
OZ
677 {
678 struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize);
95616c82 679
9fdf9d29
OZ
680 rtnh->rtnh_flags = 0;
681 rtnh->rtnh_hops = nh->weight;
682 rtnh->rtnh_ifindex = nh->iface->index;
95616c82 683
d14f8c3c 684 nl_add_nexthop(h, bufsize, nh, af);
95616c82 685
a1f5e514
OZ
686 if (nh->flags & RNF_ONLINK)
687 rtnh->rtnh_flags |= RTNH_F_ONLINK;
688
21f9acd2
OZ
689 /* Our KRT_REALM is per-route, but kernel RTA_FLOW is per-nexthop.
690 Therefore, we need to attach the same attribute to each nexthop. */
691 if (flow)
692 nl_add_attr_u32(h, bufsize, RTA_FLOW, flow->u.data);
693
9fdf9d29
OZ
694 nl_close_nexthop(h, rtnh);
695 }
696
697 nl_close_attr(h, a);
698}
95616c82 699
4e276a89 700static struct nexthop *
f5c8fb5f 701nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, const net_addr *n, struct rtattr *ra, int af, int krt_src)
95616c82 702{
ad276157 703 struct rtattr *a[BIRD_RTA_MAX];
95616c82 704 struct rtnexthop *nh = RTA_DATA(ra);
4e276a89 705 struct nexthop *rv, *first, **last;
3e236955 706 unsigned len = RTA_PAYLOAD(ra);
95616c82
OZ
707
708 first = NULL;
709 last = &first;
95616c82
OZ
710
711 while (len)
712 {
713 /* Use RTNH_OK(nh,len) ?? */
714 if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
77d032c7 715 goto err;
95616c82 716
f5c8fb5f 717 if ((nh->rtnh_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD))
df83f626
OZ
718 goto next;
719
3e792350 720 *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE);
95616c82
OZ
721 last = &(rv->next);
722
723 rv->weight = nh->rtnh_hops;
724 rv->iface = if_find_by_index(nh->rtnh_ifindex);
725 if (!rv->iface)
77d032c7
OZ
726 {
727 log(L_ERR "KRT: Received route %N with unknown ifindex %u", n, nh->rtnh_ifindex);
728 return NULL;
729 }
95616c82
OZ
730
731 /* Nonexistent RTNH_PAYLOAD ?? */
732 nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
98bb80a2
OZ
733 switch (af)
734 {
98bb80a2 735 case AF_INET:
4ff15a75 736 if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a)))
77d032c7 737 goto err;
98bb80a2 738 break;
4ff15a75 739
98bb80a2 740 case AF_INET6:
4ff15a75 741 if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want6, a, sizeof(a)))
77d032c7 742 goto err;
98bb80a2 743 break;
4ff15a75 744
f1b5f179
KY
745#ifdef HAVE_MPLS_KERNEL
746 case AF_MPLS:
747 if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want_mpls, a, sizeof(a)))
77d032c7 748 goto err;
f1b5f179
KY
749
750 if (a[RTA_NEWDST])
751 rv->labels = rta_get_mpls(a[RTA_NEWDST], rv->label);
752
753 break;
754#endif
755
98bb80a2 756 default:
77d032c7 757 goto err;
98bb80a2
OZ
758 }
759
95616c82 760 if (a[RTA_GATEWAY])
53401bef 761 rv->gw = rta_get_ipa(a[RTA_GATEWAY]);
95616c82 762
21f9acd2
OZ
763 if (a[RTA_FLOW])
764 s->rta_flow = rta_get_u32(a[RTA_FLOW]);
765
53401bef
OZ
766#ifdef HAVE_MPLS_KERNEL
767 if (a[RTA_VIA])
768 rv->gw = rta_get_via(a[RTA_VIA]);
769#endif
770
32425297
OZ
771 if (nh->rtnh_flags & RTNH_F_ONLINK)
772 rv->flags |= RNF_ONLINK;
773
53401bef
OZ
774 if (ipa_nonzero(rv->gw))
775 {
23c212e7 776 neighbor *nbr;
586c1800
OZ
777 nbr = neigh_find(&p->p, rv->gw, rv->iface,
778 (rv->flags & RNF_ONLINK) ? NEF_ONLINK : 0);
23c212e7 779 if (!nbr || (nbr->scope == SCOPE_HOST))
77d032c7
OZ
780 {
781 log(L_ERR "KRT: Received route %N with strange next-hop %I", n, rv->gw);
782 return NULL;
783 }
95616c82 784 }
62e64905 785
6b0f5f68 786#ifdef HAVE_MPLS_KERNEL
2eaf65ec 787 if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE])
6b0f5f68 788 {
77d032c7
OZ
789 if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS)
790 {
791 log(L_WARN "KRT: Received route %N with unknown encapsulation method %d",
792 n, rta_get_u16(a[RTA_ENCAP_TYPE]));
6b0f5f68 793 return NULL;
d14f8c3c
JMM
794 }
795
6b0f5f68
MJM
796 struct rtattr *enca[BIRD_RTA_MAX];
797 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
798 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
799 rv->labels = rta_get_mpls(enca[RTA_DST], rv->label);
6b0f5f68
MJM
800 }
801#endif
802
df83f626 803 next:
95616c82
OZ
804 len -= NLMSG_ALIGN(nh->rtnh_len);
805 nh = RTNH_NEXT(nh);
806 }
807
59d3a361
OZ
808 /* Ensure nexthops are sorted to satisfy nest invariant */
809 if (!nexthop_is_sorted(first))
810 first = nexthop_sort(first);
811
95616c82 812 return first;
77d032c7
OZ
813
814err:
815 log(L_ERR "KRT: Received strange multipath route %N", n);
816 return NULL;
95616c82
OZ
817}
818
9fdf9d29
OZ
819static void
820nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, int max)
821{
822 struct rtattr *a = nl_open_attr(h, bufsize, RTA_METRICS);
823 int t;
824
825 for (t = 1; t < max; t++)
826 if (metrics[0] & (1 << t))
827 nl_add_attr_u32(h, bufsize, t, metrics[t]);
828
829 nl_close_attr(h, a);
830}
831
832static int
833nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int max)
834{
835 struct rtattr *a = RTA_DATA(hdr);
836 int len = RTA_PAYLOAD(hdr);
837
838 metrics[0] = 0;
839 for (; RTA_OK(a, len); a = RTA_NEXT(a, len))
840 {
841 if (a->rta_type == RTA_UNSPEC)
842 continue;
843
844 if (a->rta_type >= max)
845 continue;
846
847 if (RTA_PAYLOAD(a) != 4)
848 return -1;
849
850 metrics[0] |= 1 << a->rta_type;
acb04cfd 851 metrics[a->rta_type] = rta_get_u32(a);
9fdf9d29
OZ
852 }
853
854 if (len > 0)
855 return -1;
856
857 return 0;
858}
859
95616c82
OZ
860
861/*
862 * Scanning of interfaces
863 */
864
865static void
866nl_parse_link(struct nlmsghdr *h, int scan)
867{
868 struct ifinfomsg *i;
ad276157 869 struct rtattr *a[BIRD_IFLA_MAX];
95616c82
OZ
870 int new = h->nlmsg_type == RTM_NEWLINK;
871 struct iface f = {};
872 struct iface *ifi;
873 char *name;
943478b0 874 u32 mtu, master = 0;
ae80a2de 875 uint fl;
95616c82 876
ad276157 877 if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), ifla_attr_want, a, sizeof(a)))
95616c82 878 return;
ad276157 879 if (!a[IFLA_IFNAME] || (RTA_PAYLOAD(a[IFLA_IFNAME]) < 2) || !a[IFLA_MTU])
95616c82 880 {
ad276157
JMM
881 /*
882 * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
883 * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
884 * We simply ignore all such messages with IFLA_WIRELESS without notice.
885 */
886
887 if (a[IFLA_WIRELESS])
888 return;
889
890 log(L_ERR "KIF: Malformed message received");
95616c82
OZ
891 return;
892 }
ad276157 893
95616c82 894 name = RTA_DATA(a[IFLA_IFNAME]);
acb04cfd 895 mtu = rta_get_u32(a[IFLA_MTU]);
95616c82 896
943478b0
OZ
897 if (a[IFLA_MASTER])
898 master = rta_get_u32(a[IFLA_MASTER]);
899
95616c82
OZ
900 ifi = if_find_by_index(i->ifi_index);
901 if (!new)
902 {
903 DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
904 if (!ifi)
905 return;
906
907 if_delete(ifi);
908 }
909 else
910 {
911 DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
912 if (ifi && strncmp(ifi->name, name, sizeof(ifi->name)-1))
913 if_delete(ifi);
914
915 strncpy(f.name, name, sizeof(f.name)-1);
916 f.index = i->ifi_index;
917 f.mtu = mtu;
918
943478b0
OZ
919 f.master_index = master;
920 f.master = if_find_by_index(master);
921
95616c82
OZ
922 fl = i->ifi_flags;
923 if (fl & IFF_UP)
924 f.flags |= IF_ADMIN_UP;
925 if (fl & IFF_LOWER_UP)
926 f.flags |= IF_LINK_UP;
927 if (fl & IFF_LOOPBACK) /* Loopback */
928 f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
929 else if (fl & IFF_POINTOPOINT) /* PtP */
930 f.flags |= IF_MULTICAST;
931 else if (fl & IFF_BROADCAST) /* Broadcast */
932 f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
933 else
934 f.flags |= IF_MULTIACCESS; /* NBMA */
3216eb03 935
16a3254c
OZ
936 if (fl & IFF_MULTICAST)
937 f.flags |= IF_MULTICAST;
938
3216eb03
OZ
939 ifi = if_update(&f);
940
941 if (!scan)
942 if_end_partial_update(ifi);
95616c82
OZ
943 }
944}
945
946static void
9b136840 947nl_parse_addr4(struct ifaddrmsg *i, int scan, int new)
95616c82 948{
ad276157 949 struct rtattr *a[BIRD_IFA_MAX];
95616c82 950 struct iface *ifi;
e37d2e3e 951 u32 ifa_flags;
95616c82
OZ
952 int scope;
953
9b136840 954 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a)))
95616c82 955 return;
ad276157 956
9b136840 957 if (!a[IFA_LOCAL])
ad276157 958 {
9b136840
JMM
959 log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)");
960 return;
ad276157 961 }
ad276157 962 if (!a[IFA_ADDRESS])
95616c82 963 {
ad276157 964 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
95616c82
OZ
965 return;
966 }
967
968 ifi = if_find_by_index(i->ifa_index);
969 if (!ifi)
970 {
971 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
972 return;
973 }
974
e37d2e3e
OZ
975 if (a[IFA_FLAGS])
976 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
977 else
978 ifa_flags = i->ifa_flags;
979
9b136840 980 struct ifa ifa;
95616c82
OZ
981 bzero(&ifa, sizeof(ifa));
982 ifa.iface = ifi;
cc5b93f7 983 if (ifa_flags & IFA_F_SECONDARY)
95616c82
OZ
984 ifa.flags |= IA_SECONDARY;
985
9b136840
JMM
986 ifa.ip = rta_get_ipa(a[IFA_LOCAL]);
987
d7661fbe 988 if (i->ifa_prefixlen > IP4_MAX_PREFIX_LENGTH)
95616c82
OZ
989 {
990 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
991 new = 0;
992 }
d7661fbe 993 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH)
95616c82 994 {
9b136840
JMM
995 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
996 net_fill_ip4(&ifa.prefix, rta_get_ip4(a[IFA_ADDRESS]), i->ifa_prefixlen);
95616c82
OZ
997
998 /* It is either a host address or a peer address */
9b136840 999 if (ipa_equal(ifa.ip, ifa.brd))
95616c82
OZ
1000 ifa.flags |= IA_HOST;
1001 else
1002 {
1003 ifa.flags |= IA_PEER;
9b136840 1004 ifa.opposite = ifa.brd;
95616c82
OZ
1005 }
1006 }
1007 else
1008 {
9b136840
JMM
1009 net_fill_ip4(&ifa.prefix, ipa_to_ip4(ifa.ip), i->ifa_prefixlen);
1010 net_normalize(&ifa.prefix);
1011
d7661fbe 1012 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 1)
95616c82
OZ
1013 ifa.opposite = ipa_opposite_m1(ifa.ip);
1014
d7661fbe 1015 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 2)
95616c82
OZ
1016 ifa.opposite = ipa_opposite_m2(ifa.ip);
1017
e2630a49
OZ
1018 if (ifi->flags & IF_BROADCAST)
1019 {
1020 /* If kernel offers us a broadcast address, we trust it */
1021 if (a[IFA_BROADCAST])
1022 ifa.brd = ipa_from_ip4(rta_get_ip4(a[IFA_BROADCAST]));
1023 /* Otherwise we create one (except for /31) */
1024 else if (i->ifa_prefixlen < (IP4_MAX_PREFIX_LENGTH - 1))
1025 ifa.brd = ipa_from_ip4(ip4_or(ipa_to_ip4(ifa.ip),
1026 ip4_not(ip4_mkmask(i->ifa_prefixlen))));
9b136840
JMM
1027 }
1028 }
1029
1030 scope = ipa_classify(ifa.ip);
1031 if (scope < 0)
1032 {
1033 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
1034 return;
1035 }
1036 ifa.scope = scope & IADDR_SCOPE_MASK;
1037
1038 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
1039 ifi->index, ifi->name,
1040 new ? "added" : "removed",
4659b2ae 1041 ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite);
9b136840
JMM
1042
1043 if (new)
1044 ifa_update(&ifa);
1045 else
1046 ifa_delete(&ifa);
1047
1048 if (!scan)
1049 if_end_partial_update(ifi);
1050}
1051
1052static void
1053nl_parse_addr6(struct ifaddrmsg *i, int scan, int new)
1054{
1055 struct rtattr *a[BIRD_IFA_MAX];
1056 struct iface *ifi;
cc5b93f7 1057 u32 ifa_flags;
9b136840
JMM
1058 int scope;
1059
1060 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a)))
1061 return;
1062
1063 if (!a[IFA_ADDRESS])
1064 {
1065 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
1066 return;
1067 }
1068
1069 ifi = if_find_by_index(i->ifa_index);
1070 if (!ifi)
1071 {
1072 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
1073 return;
1074 }
1075
cc5b93f7
OZ
1076 if (a[IFA_FLAGS])
1077 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
1078 else
1079 ifa_flags = i->ifa_flags;
1080
9b136840
JMM
1081 struct ifa ifa;
1082 bzero(&ifa, sizeof(ifa));
1083 ifa.iface = ifi;
e37d2e3e 1084 if (ifa_flags & IFA_F_SECONDARY)
9b136840
JMM
1085 ifa.flags |= IA_SECONDARY;
1086
e37d2e3e
OZ
1087 /* Ignore tentative addresses silently */
1088 if (ifa_flags & IFA_F_TENTATIVE)
1089 return;
9b136840 1090
95616c82 1091 /* IFA_LOCAL can be unset for IPv6 interfaces */
9b136840
JMM
1092 ifa.ip = rta_get_ipa(a[IFA_LOCAL] ? : a[IFA_ADDRESS]);
1093
d7661fbe 1094 if (i->ifa_prefixlen > IP6_MAX_PREFIX_LENGTH)
9b136840
JMM
1095 {
1096 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
1097 new = 0;
1098 }
d7661fbe 1099 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH)
9b136840
JMM
1100 {
1101 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
1102 net_fill_ip6(&ifa.prefix, rta_get_ip6(a[IFA_ADDRESS]), i->ifa_prefixlen);
1103
1104 /* It is either a host address or a peer address */
1105 if (ipa_equal(ifa.ip, ifa.brd))
1106 ifa.flags |= IA_HOST;
1107 else
1108 {
1109 ifa.flags |= IA_PEER;
1110 ifa.opposite = ifa.brd;
95616c82 1111 }
9b136840
JMM
1112 }
1113 else
1114 {
1115 net_fill_ip6(&ifa.prefix, ipa_to_ip6(ifa.ip), i->ifa_prefixlen);
1116 net_normalize(&ifa.prefix);
1117
d7661fbe 1118 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH - 1)
9b136840 1119 ifa.opposite = ipa_opposite_m1(ifa.ip);
95616c82
OZ
1120 }
1121
1122 scope = ipa_classify(ifa.ip);
1123 if (scope < 0)
1124 {
1125 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
1126 return;
1127 }
1128 ifa.scope = scope & IADDR_SCOPE_MASK;
1129
9b136840 1130 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
95616c82
OZ
1131 ifi->index, ifi->name,
1132 new ? "added" : "removed",
4659b2ae 1133 ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite);
3216eb03 1134
95616c82
OZ
1135 if (new)
1136 ifa_update(&ifa);
1137 else
1138 ifa_delete(&ifa);
3216eb03
OZ
1139
1140 if (!scan)
1141 if_end_partial_update(ifi);
95616c82
OZ
1142}
1143
9b136840
JMM
1144static void
1145nl_parse_addr(struct nlmsghdr *h, int scan)
1146{
1147 struct ifaddrmsg *i;
1148
1149 if (!(i = nl_checkin(h, sizeof(*i))))
1150 return;
1151
1152 int new = (h->nlmsg_type == RTM_NEWADDR);
1153
1154 switch (i->ifa_family)
1155 {
9b136840
JMM
1156 case AF_INET:
1157 return nl_parse_addr4(i, scan, new);
29a64162 1158
9b136840
JMM
1159 case AF_INET6:
1160 return nl_parse_addr6(i, scan, new);
9b136840
JMM
1161 }
1162}
1163
95616c82
OZ
1164void
1165kif_do_scan(struct kif_proto *p UNUSED)
1166{
1167 struct nlmsghdr *h;
1168
1169 if_start_update();
1170
e818f164 1171 nl_request_dump_link();
95616c82
OZ
1172 while (h = nl_get_scan())
1173 if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
1174 nl_parse_link(h, 1);
1175 else
1176 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1177
943478b0
OZ
1178 /* Re-resolve master interface for slaves */
1179 struct iface *i;
1180 WALK_LIST(i, iface_list)
1181 if (i->master_index)
1182 {
1183 struct iface f = {
1184 .flags = i->flags,
1185 .mtu = i->mtu,
1186 .index = i->index,
1187 .master_index = i->master_index,
1188 .master = if_find_by_index(i->master_index)
1189 };
1190
1191 if (f.master != i->master)
1192 {
1193 memcpy(f.name, i->name, sizeof(f.name));
1194 if_update(&f);
1195 }
1196 }
1197
e818f164 1198 nl_request_dump_addr(AF_INET);
95616c82
OZ
1199 while (h = nl_get_scan())
1200 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
3216eb03 1201 nl_parse_addr(h, 1);
95616c82
OZ
1202 else
1203 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1204
e818f164 1205 nl_request_dump_addr(AF_INET6);
d7661fbe
JMM
1206 while (h = nl_get_scan())
1207 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
1208 nl_parse_addr(h, 1);
1209 else
1210 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1211
95616c82
OZ
1212 if_end_update();
1213}
1214
1215/*
1216 * Routes
1217 */
1218
9ddbfbdd
JMM
1219static inline u32
1220krt_table_id(struct krt_proto *p)
1221{
1222 return KRT_CF->sys.table_id;
1223}
1224
1225static HASH(struct krt_proto) nl_table_map;
1226
29a64162
OZ
1227#define RTH_KEY(p) p->af, krt_table_id(p)
1228#define RTH_NEXT(p) p->sys.hash_next
1229#define RTH_EQ(a1,i1,a2,i2) a1 == a2 && i1 == i2
1230#define RTH_FN(a,i) a ^ u32_hash(i)
9ddbfbdd
JMM
1231
1232#define RTH_REHASH rth_rehash
1233#define RTH_PARAMS /8, *2, 2, 2, 6, 20
1234
1235HASH_DEFINE_REHASH_FN(RTH, struct krt_proto)
95616c82
OZ
1236
1237int
1238krt_capable(rte *e)
1239{
1240 rta *a = e->attrs;
1241
95616c82 1242 switch (a->dest)
62e64905 1243 {
4e276a89 1244 case RTD_UNICAST:
95616c82
OZ
1245 case RTD_BLACKHOLE:
1246 case RTD_UNREACHABLE:
1247 case RTD_PROHIBIT:
62e64905
OZ
1248 return 1;
1249
95616c82
OZ
1250 default:
1251 return 0;
62e64905 1252 }
95616c82
OZ
1253}
1254
1255static inline int
4e276a89 1256nh_bufsize(struct nexthop *nh)
95616c82
OZ
1257{
1258 int rv = 0;
1259 for (; nh != NULL; nh = nh->next)
9fdf9d29 1260 rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
95616c82
OZ
1261 return rv;
1262}
1263
1264static int
722daa95 1265nl_send_route(struct krt_proto *p, rte *e, int op)
95616c82
OZ
1266{
1267 eattr *ea;
1268 net *net = e->net;
1269 rta *a = e->attrs;
13c0be19 1270 ea_list *eattrs = a->eattrs;
4e276a89 1271 int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh));
4adcb9df 1272 u32 priority = 0;
a8caff32 1273
95616c82
OZ
1274 struct {
1275 struct nlmsghdr h;
1276 struct rtmsg r;
a8caff32
JMM
1277 char buf[0];
1278 } *r;
1279
1280 int rsize = sizeof(*r) + bufsize;
1281 r = alloca(rsize);
95616c82 1282
cc5b93f7 1283 DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op);
95616c82 1284
a8caff32
JMM
1285 bzero(&r->h, sizeof(r->h));
1286 bzero(&r->r, sizeof(r->r));
cc5b93f7 1287 r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE;
a8caff32 1288 r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
cc5b93f7 1289 r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK;
95616c82 1290
a8caff32
JMM
1291 r->r.rtm_family = p->af;
1292 r->r.rtm_dst_len = net_pxlen(net->n.addr);
1293 r->r.rtm_protocol = RTPROT_BIRD;
7074be22 1294 r->r.rtm_scope = RT_SCOPE_NOWHERE;
6b0f5f68 1295#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
1296 if (p->af == AF_MPLS)
1297 {
66acbc8d
OZ
1298 /*
1299 * Kernel MPLS code is a bit picky. We must:
1300 * 1) Always set RT_SCOPE_UNIVERSE and RTN_UNICAST (even for RTM_DELROUTE)
1301 * 2) Never use RTA_PRIORITY
1302 */
1303
d14f8c3c
JMM
1304 u32 label = net_mpls(net->n.addr);
1305 nl_add_attr_mpls(&r->h, rsize, RTA_DST, 1, &label);
66acbc8d
OZ
1306 r->r.rtm_scope = RT_SCOPE_UNIVERSE;
1307 r->r.rtm_type = RTN_UNICAST;
d14f8c3c
JMM
1308 }
1309 else
6b0f5f68 1310#endif
be17805c 1311 {
d14f8c3c 1312 nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr));
95616c82 1313
be17805c
OZ
1314 /* Add source address for IPv6 SADR routes */
1315 if (net->n.addr->type == NET_IP6_SADR)
1316 {
1317 net_addr_ip6_sadr *a = (void *) &net->n.addr;
1318 nl_add_attr_ip6(&r->h, rsize, RTA_SRC, a->src_prefix);
1319 r->r.rtm_src_len = a->src_pxlen;
1320 }
1321 }
1322
2feaa693
OZ
1323 /*
1324 * Strange behavior for RTM_DELROUTE:
1325 * 1) rtm_family is ignored in IPv6, works for IPv4
1326 * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
1327 * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
1328 */
1329
9ddbfbdd 1330 if (krt_table_id(p) < 256)
a8caff32 1331 r->r.rtm_table = krt_table_id(p);
9ddbfbdd 1332 else
a8caff32 1333 nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p));
9ddbfbdd 1334
66acbc8d
OZ
1335 if (p->af == AF_MPLS)
1336 priority = 0;
4adcb9df
OZ
1337 else if (KRT_CF->sys.metric)
1338 priority = KRT_CF->sys.metric;
1339 else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC)))
1340 priority = ea->u.data;
78a2cc28 1341
4adcb9df 1342 if (priority)
d1b8fe93 1343 nl_add_attr_u32(&r->h, rsize, RTA_PRIORITY, priority);
78a2cc28 1344
2feaa693
OZ
1345 /* For route delete, we do not specify remaining route attributes */
1346 if (op == NL_OP_DELETE)
722daa95 1347 goto done;
78a2cc28 1348
6e75d0d2 1349 /* Default scope is LINK for device routes, UNIVERSE otherwise */
66acbc8d
OZ
1350 if (p->af == AF_MPLS)
1351 r->r.rtm_scope = RT_SCOPE_UNIVERSE;
1352 else if (ea = ea_find(eattrs, EA_KRT_SCOPE))
cc5b93f7 1353 r->r.rtm_scope = ea->u.data;
722daa95
OZ
1354 else if (a->dest == RTD_UNICAST && ipa_zero(a->nh.gw))
1355 r->r.rtm_scope = RT_SCOPE_LINK;
6e75d0d2 1356 else
722daa95 1357 r->r.rtm_scope = RT_SCOPE_UNIVERSE;
95616c82
OZ
1358
1359 if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
a8caff32 1360 nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
95616c82
OZ
1361
1362 if (ea = ea_find(eattrs, EA_KRT_REALM))
a8caff32 1363 nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data);
95616c82 1364
9fdf9d29
OZ
1365
1366 u32 metrics[KRT_METRICS_MAX];
1367 metrics[0] = 0;
1368
1369 struct ea_walk_state ews = { .eattrs = eattrs };
1370 while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX))
1371 {
1372 int id = ea->id - EA_KRT_METRICS;
1373 metrics[0] |= 1 << id;
1374 metrics[id] = ea->u.data;
1375 }
1376
1377 if (metrics[0])
a8caff32 1378 nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX);
9fdf9d29 1379
722daa95 1380 switch (a->dest)
95616c82 1381 {
4e276a89 1382 case RTD_UNICAST:
a8caff32 1383 r->r.rtm_type = RTN_UNICAST;
722daa95
OZ
1384 struct nexthop *nh = &(a->nh);
1385 if (nh->next)
21f9acd2 1386 nl_add_multipath(&r->h, rsize, nh, p->af, eattrs);
4e276a89
JMM
1387 else
1388 {
1389 nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index);
d14f8c3c 1390 nl_add_nexthop(&r->h, rsize, nh, p->af);
a1f5e514
OZ
1391
1392 if (nh->flags & RNF_ONLINK)
1393 r->r.rtm_flags |= RTNH_F_ONLINK;
4e276a89 1394 }
95616c82
OZ
1395 break;
1396 case RTD_BLACKHOLE:
a8caff32 1397 r->r.rtm_type = RTN_BLACKHOLE;
95616c82
OZ
1398 break;
1399 case RTD_UNREACHABLE:
a8caff32 1400 r->r.rtm_type = RTN_UNREACHABLE;
95616c82
OZ
1401 break;
1402 case RTD_PROHIBIT:
a8caff32 1403 r->r.rtm_type = RTN_PROHIBIT;
95616c82 1404 break;
2feaa693
OZ
1405 case RTD_NONE:
1406 break;
95616c82
OZ
1407 default:
1408 bug("krt_capable inconsistent with nl_send_route");
1409 }
1410
722daa95 1411done:
2feaa693 1412 /* Ignore missing for DELETE */
cc5b93f7 1413 return nl_exchange(&r->h, (op == NL_OP_DELETE));
2feaa693
OZ
1414}
1415
1416static inline int
ddb1bdf2 1417nl_allow_replace(struct krt_proto *p, rte *new)
2feaa693 1418{
ddb1bdf2
OZ
1419 /*
1420 * We use NL_OP_REPLACE for IPv4, it has an issue with not checking for
1421 * matching rtm_protocol, but that is OK when dedicated priority is used.
1422 *
1423 * For IPv6, the NL_OP_REPLACE is still broken even in Linux 4.19 LTS
1424 * (although it seems to be fixed in Linux 5.10 LTS) for sequence:
1425 *
1426 * ip route add 2001:db8::/32 via fe80::1 dev eth0
1427 * ip route replace 2001:db8::/32 dev eth0
1428 *
1429 * (it ends with two routes instead of replacing the first by the second one)
1430 *
1431 * Replacing with direct and special type (e.g. unreachable) routes does not
1432 * work, but replacing with regular routes work reliably
1433 */
2feaa693 1434
ddb1bdf2
OZ
1435 if (krt_ipv4(p))
1436 return 1;
95616c82 1437
ddb1bdf2
OZ
1438 rta *a = new->attrs;
1439 return (a->dest == RTD_UNICAST) && ipa_nonzero(a->nh.gw);
8235c474
OZ
1440}
1441
95616c82 1442void
cc75b3e1 1443krt_replace_rte(struct krt_proto *p, net *n UNUSED, rte *new, rte *old)
95616c82
OZ
1444{
1445 int err = 0;
1446
ddb1bdf2 1447 if (old && new && nl_allow_replace(p, new))
8235c474 1448 {
722daa95 1449 err = nl_send_route(p, new, NL_OP_REPLACE);
8235c474
OZ
1450 }
1451 else
1452 {
1453 if (old)
722daa95 1454 nl_send_route(p, old, NL_OP_DELETE);
95616c82 1455
8235c474 1456 if (new)
722daa95 1457 err = nl_send_route(p, new, NL_OP_ADD);
8235c474 1458 }
95616c82 1459
cc75b3e1
OZ
1460 if (new)
1461 {
1462 if (err < 0)
1463 bmap_clear(&p->sync_map, new->id);
1464 else
1465 bmap_set(&p->sync_map, new->id);
1466 }
95616c82
OZ
1467}
1468
2feaa693 1469
d0dd1d20
OZ
1470#define SKIP0(ARG, ...) do { DBG("KRT: Ignoring route - " ARG, ##__VA_ARGS__); return; } while(0)
1471#define SKIP(ARG, ...) do { DBG("KRT: Ignoring route %N - " ARG, &dst, ##__VA_ARGS__); return; } while(0)
95616c82
OZ
1472
1473static void
2feaa693 1474nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
95616c82
OZ
1475{
1476 struct krt_proto *p;
1477 struct rtmsg *i;
ad276157 1478 struct rtattr *a[BIRD_RTA_MAX];
95616c82
OZ
1479 int new = h->nlmsg_type == RTM_NEWROUTE;
1480
be17805c 1481 net_addr dst, src = {};
95616c82 1482 u32 oif = ~0;
29a64162 1483 u32 table_id;
2feaa693 1484 u32 priority = 0;
6e75d0d2 1485 u32 def_scope = RT_SCOPE_UNIVERSE;
be17805c 1486 int krt_src;
95616c82 1487
ad276157 1488 if (!(i = nl_checkin(h, sizeof(*i))))
95616c82 1489 return;
ad276157
JMM
1490
1491 switch (i->rtm_family)
95616c82 1492 {
29a64162
OZ
1493 case AF_INET:
1494 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a)))
1495 return;
1496
1497 if (a[RTA_DST])
1498 net_fill_ip4(&dst, rta_get_ip4(a[RTA_DST]), i->rtm_dst_len);
1499 else
1500 net_fill_ip4(&dst, IP4_NONE, 0);
1501 break;
1502
cc5b93f7
OZ
1503 case AF_INET6:
1504 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a)))
1505 return;
29a64162
OZ
1506
1507 if (a[RTA_DST])
1508 net_fill_ip6(&dst, rta_get_ip6(a[RTA_DST]), i->rtm_dst_len);
1509 else
1510 net_fill_ip6(&dst, IP6_NONE, 0);
be17805c
OZ
1511
1512 if (a[RTA_SRC])
1513 net_fill_ip6(&src, rta_get_ip6(a[RTA_SRC]), i->rtm_src_len);
1514 else
1515 net_fill_ip6(&src, IP6_NONE, 0);
29a64162
OZ
1516 break;
1517
6b0f5f68 1518#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
1519 case AF_MPLS:
1520 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want_mpls, a, sizeof(a)))
1521 return;
1522
ed610044 1523 if (!a[RTA_DST])
d0dd1d20 1524 SKIP0("MPLS route without RTA_DST\n");
ed610044
OZ
1525
1526 if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) != 1)
d0dd1d20 1527 SKIP0("MPLS route with multi-label RTA_DST\n");
ed610044
OZ
1528
1529 net_fill_mpls(&dst, rta_mpls_stack[0]);
d14f8c3c 1530 break;
6b0f5f68 1531#endif
d14f8c3c 1532
29a64162
OZ
1533 default:
1534 return;
95616c82
OZ
1535 }
1536
95616c82 1537 if (a[RTA_OIF])
acb04cfd 1538 oif = rta_get_u32(a[RTA_OIF]);
95616c82 1539
9ddbfbdd 1540 if (a[RTA_TABLE])
29a64162 1541 table_id = rta_get_u32(a[RTA_TABLE]);
9ddbfbdd 1542 else
29a64162 1543 table_id = i->rtm_table;
9ddbfbdd 1544
d0dd1d20
OZ
1545 if (i->rtm_flags & RTM_F_CLONED)
1546 SKIP("cloned\n");
1547
29a64162
OZ
1548 /* Do we know this table? */
1549 p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id);
95616c82 1550 if (!p)
4659b2ae 1551 SKIP("unknown table %u\n", table_id);
95616c82 1552
be17805c
OZ
1553 if (a[RTA_SRC] && (p->p.net_type != NET_IP6_SADR))
1554 SKIP("src prefix for non-SADR channel\n");
1555
95616c82
OZ
1556 if (a[RTA_IIF])
1557 SKIP("IIF set\n");
29a64162 1558
95616c82
OZ
1559 if (i->rtm_tos != 0) /* We don't support TOS */
1560 SKIP("TOS %02x\n", i->rtm_tos);
95616c82 1561
2feaa693 1562 if (s->scan && !new)
95616c82
OZ
1563 SKIP("RTM_DELROUTE in scan\n");
1564
2feaa693
OZ
1565 if (a[RTA_PRIORITY])
1566 priority = rta_get_u32(a[RTA_PRIORITY]);
1567
9b136840 1568 int c = net_classify(&dst);
95616c82
OZ
1569 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
1570 SKIP("strange class/scope\n");
1571
95616c82
OZ
1572 switch (i->rtm_protocol)
1573 {
1574 case RTPROT_UNSPEC:
1575 SKIP("proto unspec\n");
1576
1577 case RTPROT_REDIRECT:
be17805c 1578 krt_src = KRT_SRC_REDIRECT;
95616c82
OZ
1579 break;
1580
1581 case RTPROT_KERNEL:
be17805c 1582 krt_src = KRT_SRC_KERNEL;
95616c82
OZ
1583 return;
1584
1585 case RTPROT_BIRD:
2feaa693 1586 if (!s->scan)
95616c82 1587 SKIP("echo\n");
be17805c 1588 krt_src = KRT_SRC_BIRD;
95616c82
OZ
1589 break;
1590
1591 case RTPROT_BOOT:
1592 default:
be17805c 1593 krt_src = KRT_SRC_ALIEN;
95616c82
OZ
1594 }
1595
be17805c
OZ
1596 net_addr *n = &dst;
1597 if (p->p.net_type == NET_IP6_SADR)
1598 {
1599 n = alloca(sizeof(net_addr_ip6_sadr));
1600 net_fill_ip6_sadr(n, net6_prefix(&dst), net6_pxlen(&dst),
1601 net6_prefix(&src), net6_pxlen(&src));
1602 }
1603
1604 net *net = net_get(p->p.main_channel->table, n);
95616c82 1605
d14f8c3c 1606 rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE);
2feaa693
OZ
1607 ra->source = RTS_INHERIT;
1608 ra->scope = SCOPE_UNIVERSE;
95616c82 1609
082905a8
OZ
1610 {
1611 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + 2 * sizeof(eattr));
1612 *ea = (ea_list) { .flags = EALF_SORTED, .count = 2 };
1613 ea->next = ra->eattrs;
1614 ra->eattrs = ea;
1615
1616 ea->attrs[0] = (eattr) {
1617 .id = EA_KRT_SOURCE,
1618 .type = EAF_TYPE_INT,
1619 .u.data = i->rtm_protocol
1620 };
1621
1622 ea->attrs[1] = (eattr) {
1623 .id = EA_KRT_METRIC,
1624 .type = EAF_TYPE_INT,
1625 .u.data = priority,
1626 };
1627 }
1628
21f9acd2
OZ
1629 if (a[RTA_FLOW])
1630 s->rta_flow = rta_get_u32(a[RTA_FLOW]);
1631 else
1632 s->rta_flow = 0;
1633
95616c82
OZ
1634 switch (i->rtm_type)
1635 {
1636 case RTN_UNICAST:
62e64905 1637 ra->dest = RTD_UNICAST;
95616c82 1638
98bb80a2 1639 if (a[RTA_MULTIPATH])
4ff15a75 1640 {
f5c8fb5f 1641 struct nexthop *nh = nl_parse_multipath(s, p, n, a[RTA_MULTIPATH], i->rtm_family, krt_src);
4e276a89 1642 if (!nh)
77d032c7 1643 SKIP("strange RTA_MULTIPATH\n");
9fdf9d29 1644
2eaf65ec 1645 nexthop_link(ra, nh);
95616c82
OZ
1646 break;
1647 }
1648
f5c8fb5f 1649 if ((i->rtm_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD))
77d032c7 1650 SKIP("ignore RTNH_F_DEAD\n");
df83f626 1651
4e276a89
JMM
1652 ra->nh.iface = if_find_by_index(oif);
1653 if (!ra->nh.iface)
95616c82 1654 {
fe9f1a6d 1655 log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif);
95616c82
OZ
1656 return;
1657 }
1658
53401bef
OZ
1659 if (a[RTA_GATEWAY])
1660 ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]);
1661
6b0f5f68 1662#ifdef HAVE_MPLS_KERNEL
53401bef
OZ
1663 if (a[RTA_VIA])
1664 ra->nh.gw = rta_get_via(a[RTA_VIA]);
6b0f5f68 1665#endif
95616c82 1666
32425297
OZ
1667 if (i->rtm_flags & RTNH_F_ONLINK)
1668 ra->nh.flags |= RNF_ONLINK;
1669
53401bef
OZ
1670 if (ipa_nonzero(ra->nh.gw))
1671 {
95616c82 1672 /* Silently skip strange 6to4 routes */
0bf95f99 1673 const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96);
4e276a89 1674 if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit))
95616c82
OZ
1675 return;
1676
23c212e7 1677 neighbor *nbr;
586c1800
OZ
1678 nbr = neigh_find(&p->p, ra->nh.gw, ra->nh.iface,
1679 (ra->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0);
23c212e7 1680 if (!nbr || (nbr->scope == SCOPE_HOST))
95616c82 1681 {
4e276a89
JMM
1682 log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr,
1683 ra->nh.gw);
95616c82
OZ
1684 return;
1685 }
1686 }
95616c82
OZ
1687
1688 break;
1689 case RTN_BLACKHOLE:
2feaa693 1690 ra->dest = RTD_BLACKHOLE;
95616c82
OZ
1691 break;
1692 case RTN_UNREACHABLE:
2feaa693 1693 ra->dest = RTD_UNREACHABLE;
95616c82
OZ
1694 break;
1695 case RTN_PROHIBIT:
2feaa693 1696 ra->dest = RTD_PROHIBIT;
95616c82
OZ
1697 break;
1698 /* FIXME: What about RTN_THROW? */
1699 default:
1700 SKIP("type %d\n", i->rtm_type);
1701 return;
1702 }
1703
6b0f5f68 1704#ifdef HAVE_MPLS_KERNEL
d14f8c3c 1705 if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next)
2eaf65ec 1706 ra->nh.labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label);
d14f8c3c
JMM
1707
1708 if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next)
1709 {
1710 switch (rta_get_u16(a[RTA_ENCAP_TYPE]))
1711 {
1712 case LWTUNNEL_ENCAP_MPLS:
1713 {
1714 struct rtattr *enca[BIRD_RTA_MAX];
1715 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
1716 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
2eaf65ec 1717 ra->nh.labels = rta_get_mpls(enca[RTA_DST], ra->nh.label);
d14f8c3c
JMM
1718 break;
1719 }
1720 default:
1721 SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE]));
1722 break;
1723 }
1724 }
6b0f5f68 1725#endif
d14f8c3c 1726
6e75d0d2
OZ
1727 if (i->rtm_scope != def_scope)
1728 {
1729 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1730 ea->next = ra->eattrs;
1731 ra->eattrs = ea;
1732 ea->flags = EALF_SORTED;
1733 ea->count = 1;
16ac6c3c
MM
1734 ea->attrs[0] = (eattr) {
1735 .id = EA_KRT_SCOPE,
1736 .flags = 0,
1737 .type = EAF_TYPE_INT,
1738 .u.data = i->rtm_scope,
1739 };
6e75d0d2 1740 }
95616c82
OZ
1741
1742 if (a[RTA_PREFSRC])
1743 {
9b136840 1744 ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]);
95616c82 1745
16ac6c3c
MM
1746 struct adata *ad = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
1747 ad->length = sizeof(ps);
1748 memcpy(ad->data, &ps, sizeof(ps));
1749
2feaa693
OZ
1750 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1751 ea->next = ra->eattrs;
1752 ra->eattrs = ea;
95616c82
OZ
1753 ea->flags = EALF_SORTED;
1754 ea->count = 1;
16ac6c3c
MM
1755 ea->attrs[0] = (eattr) {
1756 .id = EA_KRT_PREFSRC,
1757 .flags = 0,
1758 .type = EAF_TYPE_IP_ADDRESS,
1759 .u.ptr = ad,
1760 };
95616c82
OZ
1761 }
1762
21f9acd2
OZ
1763 /* Can be set per-route or per-nexthop */
1764 if (s->rta_flow)
95616c82 1765 {
2feaa693
OZ
1766 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1767 ea->next = ra->eattrs;
1768 ra->eattrs = ea;
95616c82
OZ
1769 ea->flags = EALF_SORTED;
1770 ea->count = 1;
16ac6c3c
MM
1771 ea->attrs[0] = (eattr) {
1772 .id = EA_KRT_REALM,
1773 .flags = 0,
1774 .type = EAF_TYPE_INT,
1775 .u.data = s->rta_flow,
1776 };
95616c82
OZ
1777 }
1778
9fdf9d29
OZ
1779 if (a[RTA_METRICS])
1780 {
1781 u32 metrics[KRT_METRICS_MAX];
2feaa693 1782 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
9fdf9d29
OZ
1783 int t, n = 0;
1784
1785 if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
1786 {
fe9f1a6d 1787 log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net->n.addr);
9fdf9d29
OZ
1788 return;
1789 }
1790
1791 for (t = 1; t < KRT_METRICS_MAX; t++)
1792 if (metrics[0] & (1 << t))
16ac6c3c
MM
1793 ea->attrs[n++] = (eattr) {
1794 .id = EA_CODE(PROTOCOL_KERNEL, KRT_METRICS_OFFSET + t),
1795 .flags = 0,
1796 .type = EAF_TYPE_INT, /* FIXME: Some are EAF_TYPE_BITFIELD */
1797 .u.data = metrics[t],
1798 };
9fdf9d29
OZ
1799
1800 if (n > 0)
1801 {
2feaa693 1802 ea->next = ra->eattrs;
9fdf9d29
OZ
1803 ea->flags = EALF_SORTED;
1804 ea->count = n;
2feaa693 1805 ra->eattrs = ea;
9fdf9d29
OZ
1806 }
1807 }
1808
082905a8 1809 rte *e = rte_get_temp(ra, p->p.main_source);
722daa95 1810 e->net = net;
2feaa693 1811
722daa95 1812 if (s->scan)
082905a8 1813 krt_got_route(p, e, krt_src);
95616c82 1814 else
082905a8 1815 krt_got_route_async(p, e, new, krt_src);
2feaa693 1816
722daa95 1817 lp_flush(s->pool);
95616c82
OZ
1818}
1819
1820void
534d0a4b 1821krt_do_scan(struct krt_proto *p)
95616c82 1822{
722daa95
OZ
1823 struct nl_parse_state s = {
1824 .proto = p,
1825 .pool = nl_linpool,
1826 .scan = 1,
1827 };
534d0a4b
OZ
1828
1829 /* Table-specific scan or shared scan */
1830 if (p)
1831 nl_request_dump_route(p->af, krt_table_id(p));
1832 else
1833 nl_request_dump_route(AF_UNSPEC, 0);
95616c82 1834
722daa95 1835 struct nlmsghdr *h;
95616c82 1836 while (h = nl_get_scan())
534d0a4b 1837 {
95616c82 1838 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
2feaa693 1839 nl_parse_route(&s, h);
95616c82
OZ
1840 else
1841 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
534d0a4b 1842 }
95616c82
OZ
1843}
1844
1845/*
1846 * Asynchronous Netlink interface
1847 */
1848
1849static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
1850static byte *nl_async_rx_buffer; /* Receive buffer */
81ee6cda
OZ
1851static uint nl_async_bufsize; /* Kernel rx buffer size for the netlink socket */
1852static struct config *nl_last_config; /* For tracking changes to nl_async_bufsize */
95616c82
OZ
1853
1854static void
1855nl_async_msg(struct nlmsghdr *h)
1856{
722daa95
OZ
1857 struct nl_parse_state s = {
1858 .proto = NULL,
1859 .pool = nl_linpool,
1860 .scan = 0,
1861 };
2feaa693 1862
95616c82
OZ
1863 switch (h->nlmsg_type)
1864 {
1865 case RTM_NEWROUTE:
1866 case RTM_DELROUTE:
1867 DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
2feaa693 1868 nl_parse_route(&s, h);
95616c82
OZ
1869 break;
1870 case RTM_NEWLINK:
1871 case RTM_DELLINK:
1872 DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
1e4891e4
OZ
1873 if (kif_proto)
1874 nl_parse_link(h, 0);
95616c82
OZ
1875 break;
1876 case RTM_NEWADDR:
1877 case RTM_DELADDR:
1878 DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
1e4891e4
OZ
1879 if (kif_proto)
1880 nl_parse_addr(h, 0);
95616c82
OZ
1881 break;
1882 default:
1883 DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
1884 }
1885}
1886
1887static int
3e236955 1888nl_async_hook(sock *sk, uint size UNUSED)
95616c82
OZ
1889{
1890 struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
1891 struct sockaddr_nl sa;
31e9e101
ST
1892 struct msghdr m = {
1893 .msg_name = &sa,
1894 .msg_namelen = sizeof(sa),
1895 .msg_iov = &iov,
1896 .msg_iovlen = 1,
1897 };
95616c82
OZ
1898 struct nlmsghdr *h;
1899 int x;
ae80a2de 1900 uint len;
95616c82
OZ
1901
1902 x = recvmsg(sk->fd, &m, 0);
1903 if (x < 0)
1904 {
1905 if (errno == ENOBUFS)
1906 {
1907 /*
1908 * Netlink reports some packets have been thrown away.
1909 * One day we might react to it by asking for route table
1910 * scan in near future.
1911 */
2c33da50 1912 log(L_WARN "Kernel dropped some netlink messages, will resync on next scan.");
95616c82
OZ
1913 return 1; /* More data are likely to be ready */
1914 }
1915 else if (errno != EWOULDBLOCK)
1916 log(L_ERR "Netlink recvmsg: %m");
1917 return 0;
1918 }
1919 if (sa.nl_pid) /* It isn't from the kernel */
1920 {
1921 DBG("Non-kernel packet\n");
1922 return 1;
1923 }
1924 h = (void *) nl_async_rx_buffer;
1925 len = x;
1926 if (m.msg_flags & MSG_TRUNC)
1927 {
1928 log(L_WARN "Netlink got truncated asynchronous message");
1929 return 1;
1930 }
1931 while (NLMSG_OK(h, len))
1932 {
1933 nl_async_msg(h);
1934 h = NLMSG_NEXT(h, len);
1935 }
1936 if (len)
1937 log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
1938 return 1;
1939}
1940
ccd2a3ed
JMM
1941static void
1942nl_async_err_hook(sock *sk, int e UNUSED)
1943{
1944 nl_async_hook(sk, 0);
1945}
1946
95616c82
OZ
1947static void
1948nl_open_async(void)
1949{
1950 sock *sk;
1951 struct sockaddr_nl sa;
1952 int fd;
95616c82 1953
f83ce94d 1954 if (nl_async_sk)
95616c82 1955 return;
95616c82
OZ
1956
1957 DBG("KRT: Opening async netlink socket\n");
1958
1959 fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1960 if (fd < 0)
1961 {
1962 log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
1963 return;
1964 }
1965
1966 bzero(&sa, sizeof(sa));
1967 sa.nl_family = AF_NETLINK;
29a64162
OZ
1968 sa.nl_groups = RTMGRP_LINK |
1969 RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE |
1970 RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
1971
95616c82
OZ
1972 if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
1973 {
1974 log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
f83ce94d 1975 close(fd);
95616c82
OZ
1976 return;
1977 }
1978
f83ce94d
OZ
1979 nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
1980
95616c82
OZ
1981 sk = nl_async_sk = sk_new(krt_pool);
1982 sk->type = SK_MAGIC;
1983 sk->rx_hook = nl_async_hook;
ccd2a3ed 1984 sk->err_hook = nl_async_err_hook;
95616c82 1985 sk->fd = fd;
05476c4d 1986 if (sk_open(sk) < 0)
95616c82 1987 bug("Netlink: sk_open failed");
95616c82
OZ
1988}
1989
81ee6cda
OZ
1990static void
1991nl_update_async_bufsize(void)
1992{
1993 /* No async socket */
1994 if (!nl_async_sk)
1995 return;
1996
1997 /* Already reconfigured */
1998 if (nl_last_config == config)
1999 return;
2000
2001 /* Update netlink buffer size */
2002 uint bufsize = nl_cfg_rx_buffer_size(config);
2003 if (bufsize && (bufsize != nl_async_bufsize))
2004 {
2005 /* Log message for reconfigurations only */
2006 if (nl_last_config)
2007 log(L_INFO "KRT: Changing netlink rx buffer size to %u", bufsize);
2008
2009 nl_set_rcvbuf(nl_async_sk->fd, bufsize);
2010 nl_async_bufsize = bufsize;
2011 }
2012
2013 nl_last_config = config;
2014}
2015
9ddbfbdd 2016
95616c82
OZ
2017/*
2018 * Interface to the UNIX krt module
2019 */
2020
95616c82 2021void
9ddbfbdd
JMM
2022krt_sys_io_init(void)
2023{
05d47bd5 2024 nl_linpool = lp_new_default(krt_pool);
9ddbfbdd
JMM
2025 HASH_INIT(nl_table_map, krt_pool, 6);
2026}
2027
2028int
c6964c30 2029krt_sys_start(struct krt_proto *p)
95616c82 2030{
29a64162 2031 struct krt_proto *old = HASH_FIND(nl_table_map, RTH, p->af, krt_table_id(p));
9ddbfbdd
JMM
2032
2033 if (old)
2034 {
2035 log(L_ERR "%s: Kernel table %u already registered by %s",
2036 p->p.name, krt_table_id(p), old->p.name);
2037 return 0;
2038 }
2039
2040 HASH_INSERT2(nl_table_map, RTH, krt_pool, p);
c6964c30
OZ
2041
2042 nl_open();
2043 nl_open_async();
81ee6cda 2044 nl_update_async_bufsize();
9ddbfbdd
JMM
2045
2046 return 1;
95616c82
OZ
2047}
2048
2049void
9ddbfbdd 2050krt_sys_shutdown(struct krt_proto *p)
95616c82 2051{
81ee6cda
OZ
2052 nl_update_async_bufsize();
2053
9ddbfbdd 2054 HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
95616c82
OZ
2055}
2056
2057int
2058krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
2059{
81ee6cda
OZ
2060 nl_update_async_bufsize();
2061
4adcb9df 2062 return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
95616c82
OZ
2063}
2064
95616c82
OZ
2065void
2066krt_sys_init_config(struct krt_config *cf)
2067{
2068 cf->sys.table_id = RT_TABLE_MAIN;
bff21441 2069 cf->sys.metric = 32;
95616c82
OZ
2070}
2071
2072void
2073krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
2074{
2075 d->sys.table_id = s->sys.table_id;
4adcb9df 2076 d->sys.metric = s->sys.metric;
95616c82
OZ
2077}
2078
9fdf9d29
OZ
2079static const char *krt_metrics_names[KRT_METRICS_MAX] = {
2080 NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
2081 "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
2082};
2083
2084static const char *krt_features_names[KRT_FEATURES_MAX] = {
2085 "ecn", NULL, NULL, "allfrag"
2086};
2087
2088int
258be565 2089krt_sys_get_attr(const eattr *a, byte *buf, int buflen UNUSED)
9fdf9d29
OZ
2090{
2091 switch (a->id)
2092 {
2093 case EA_KRT_PREFSRC:
2094 bsprintf(buf, "prefsrc");
2095 return GA_NAME;
2096
2097 case EA_KRT_REALM:
2098 bsprintf(buf, "realm");
2099 return GA_NAME;
2100
6e75d0d2
OZ
2101 case EA_KRT_SCOPE:
2102 bsprintf(buf, "scope");
2103 return GA_NAME;
2104
9fdf9d29
OZ
2105 case EA_KRT_LOCK:
2106 buf += bsprintf(buf, "lock:");
2107 ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
2108 return GA_FULL;
2109
2110 case EA_KRT_FEATURES:
2111 buf += bsprintf(buf, "features:");
2112 ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
2113 return GA_FULL;
2114
2115 default:;
2116 int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET;
2117 if (id > 0 && id < KRT_METRICS_MAX)
2118 {
2119 bsprintf(buf, "%s", krt_metrics_names[id]);
2120 return GA_NAME;
2121 }
2122
2123 return GA_UNKNOWN;
2124 }
2125}
2126
95616c82
OZ
2127
2128
2129void
2130kif_sys_start(struct kif_proto *p UNUSED)
2131{
2132 nl_open();
2133 nl_open_async();
2134}
2135
2136void
2137kif_sys_shutdown(struct kif_proto *p UNUSED)
2138{
2139}
153f02da
OZ
2140
2141int
2142kif_update_sysdep_addr(struct iface *i UNUSED)
2143{
2144 return 0;
2145}