]> git.ipfire.org Git - thirdparty/bird.git/blob - sysdep/linux/netlink.c
4714263ade2632146f344db251fd251228e9912a
[thirdparty/bird.git] / sysdep / linux / netlink.c
1 /*
2 * BIRD -- Linux Netlink Interface
3 *
4 * (c) 1999--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9 #include <alloca.h>
10 #include <stdio.h>
11 #include <unistd.h>
12 #include <fcntl.h>
13 #include <sys/socket.h>
14 #include <sys/uio.h>
15 #include <errno.h>
16
17 #undef LOCAL_DEBUG
18
19 #include "nest/bird.h"
20 #include "nest/route.h"
21 #include "nest/protocol.h"
22 #include "nest/iface.h"
23 #include "lib/alloca.h"
24 #include "sysdep/unix/unix.h"
25 #include "sysdep/unix/krt.h"
26 #include "lib/socket.h"
27 #include "lib/string.h"
28 #include "lib/hash.h"
29 #include "conf/conf.h"
30
31 #include <asm/types.h>
32 #include <linux/if.h>
33 #include <linux/netlink.h>
34 #include <linux/rtnetlink.h>
35
36 #ifdef HAVE_MPLS_KERNEL
37 #include <linux/lwtunnel.h>
38 #endif
39
40 #ifndef MSG_TRUNC /* Hack: Several versions of glibc miss this one :( */
41 #define MSG_TRUNC 0x20
42 #endif
43
44 #ifndef IFA_FLAGS
45 #define IFA_FLAGS 8
46 #endif
47
48 #ifndef IFF_LOWER_UP
49 #define IFF_LOWER_UP 0x10000
50 #endif
51
52 #ifndef RTA_TABLE
53 #define RTA_TABLE 15
54 #endif
55
56 #ifndef RTA_VIA
57 #define RTA_VIA 18
58 #endif
59
60 #ifndef RTA_NEWDST
61 #define RTA_NEWDST 19
62 #endif
63
64 #ifndef RTA_ENCAP_TYPE
65 #define RTA_ENCAP_TYPE 21
66 #endif
67
68 #ifndef RTA_ENCAP
69 #define RTA_ENCAP 22
70 #endif
71
72 #define krt_ipv4(p) ((p)->af == AF_INET)
73 #define krt_ecmp6(p) ((p)->af == AF_INET6)
74
75 const int rt_default_ecmp = 16;
76
77 /*
78 * Structure nl_parse_state keeps state of received route processing. Ideally,
79 * we could just independently parse received Netlink messages and immediately
80 * propagate received routes to the rest of BIRD, but older Linux kernel (before
81 * version 4.11) represents and announces IPv6 ECMP routes not as one route with
82 * multiple next hops (like RTA_MULTIPATH in IPv4 ECMP), but as a sequence of
83 * routes with the same prefix. More recent kernels work as with IPv4.
84 *
85 * Therefore, BIRD keeps currently processed route in nl_parse_state structure
86 * and postpones its propagation until we expect it to be final; i.e., when
87 * non-matching route is received or when the scan ends. When another matching
88 * route is received, it is merged with the already processed route to form an
89 * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
90 * postponing is done in both cases (for simplicity). All IPv4 routes or IPv6
91 * routes with RTA_MULTIPATH set are just considered non-matching.
92 *
93 * This is ignored for asynchronous notifications (every notification is handled
94 * as a separate route). It is not an issue for our routes, as we ignore such
95 * notifications anyways. But importing alien IPv6 ECMP routes does not work
96 * properly with older kernels.
97 *
98 * Whatever the kernel version is, IPv6 ECMP routes are sent as multiple routes
99 * for the same prefix.
100 */
101
102 struct nl_parse_state
103 {
104 struct linpool *pool;
105 int scan;
106 int merge;
107
108 net *net;
109 rta *attrs;
110 struct krt_proto *proto;
111 s8 new;
112 s8 krt_src;
113 u8 krt_type;
114 u8 krt_proto;
115 u32 krt_metric;
116 };
117
118 /*
119 * Synchronous Netlink interface
120 */
121
122 struct nl_sock
123 {
124 int fd;
125 u32 seq;
126 byte *rx_buffer; /* Receive buffer */
127 struct nlmsghdr *last_hdr; /* Recently received packet */
128 uint last_size;
129 };
130
131 #define NL_RX_SIZE 8192
132
133 #define NL_OP_DELETE 0
134 #define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
135 #define NL_OP_REPLACE (NLM_F_CREATE|NLM_F_REPLACE)
136 #define NL_OP_APPEND (NLM_F_CREATE|NLM_F_APPEND)
137
138 static linpool *nl_linpool;
139
140 static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for synchronous scan */
141 static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */
142
143 static void
144 nl_open_sock(struct nl_sock *nl)
145 {
146 if (nl->fd < 0)
147 {
148 nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
149 if (nl->fd < 0)
150 die("Unable to open rtnetlink socket: %m");
151 nl->seq = (u32) (current_time() TO_S); /* Or perhaps random_u32() ? */
152 nl->rx_buffer = xmalloc(NL_RX_SIZE);
153 nl->last_hdr = NULL;
154 nl->last_size = 0;
155 }
156 }
157
158 static void
159 nl_open(void)
160 {
161 nl_open_sock(&nl_scan);
162 nl_open_sock(&nl_req);
163 }
164
165 static void
166 nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
167 {
168 struct sockaddr_nl sa;
169
170 memset(&sa, 0, sizeof(sa));
171 sa.nl_family = AF_NETLINK;
172 nh->nlmsg_pid = 0;
173 nh->nlmsg_seq = ++(nl->seq);
174 nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len);
175 if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
176 die("rtnetlink sendto: %m");
177 nl->last_hdr = NULL;
178 }
179
180 static void
181 nl_request_dump(int af, int cmd)
182 {
183 struct {
184 struct nlmsghdr nh;
185 struct rtgenmsg g;
186 } req = {
187 .nh.nlmsg_type = cmd,
188 .nh.nlmsg_len = sizeof(req),
189 .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
190 .g.rtgen_family = af
191 };
192 nl_send(&nl_scan, &req.nh);
193 }
194
195 static struct nlmsghdr *
196 nl_get_reply(struct nl_sock *nl)
197 {
198 for(;;)
199 {
200 if (!nl->last_hdr)
201 {
202 struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
203 struct sockaddr_nl sa;
204 struct msghdr m = {
205 .msg_name = &sa,
206 .msg_namelen = sizeof(sa),
207 .msg_iov = &iov,
208 .msg_iovlen = 1,
209 };
210 int x = recvmsg(nl->fd, &m, 0);
211 if (x < 0)
212 die("nl_get_reply: %m");
213 if (sa.nl_pid) /* It isn't from the kernel */
214 {
215 DBG("Non-kernel packet\n");
216 continue;
217 }
218 nl->last_size = x;
219 nl->last_hdr = (void *) nl->rx_buffer;
220 if (m.msg_flags & MSG_TRUNC)
221 bug("nl_get_reply: got truncated reply which should be impossible");
222 }
223 if (NLMSG_OK(nl->last_hdr, nl->last_size))
224 {
225 struct nlmsghdr *h = nl->last_hdr;
226 nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
227 if (h->nlmsg_seq != nl->seq)
228 {
229 log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
230 h->nlmsg_seq, nl->seq);
231 continue;
232 }
233 return h;
234 }
235 if (nl->last_size)
236 log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
237 nl->last_hdr = NULL;
238 }
239 }
240
241 static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
242
243 static int
244 nl_error(struct nlmsghdr *h, int ignore_esrch)
245 {
246 struct nlmsgerr *e;
247 int ec;
248
249 if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
250 {
251 log(L_WARN "Netlink: Truncated error message received");
252 return ENOBUFS;
253 }
254 e = (struct nlmsgerr *) NLMSG_DATA(h);
255 ec = -e->error;
256 if (ec && !(ignore_esrch && (ec == ESRCH)))
257 log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
258 return ec;
259 }
260
261 static struct nlmsghdr *
262 nl_get_scan(void)
263 {
264 struct nlmsghdr *h = nl_get_reply(&nl_scan);
265
266 if (h->nlmsg_type == NLMSG_DONE)
267 return NULL;
268 if (h->nlmsg_type == NLMSG_ERROR)
269 {
270 nl_error(h, 0);
271 return NULL;
272 }
273 return h;
274 }
275
276 static int
277 nl_exchange(struct nlmsghdr *pkt, int ignore_esrch)
278 {
279 struct nlmsghdr *h;
280
281 nl_send(&nl_req, pkt);
282 for(;;)
283 {
284 h = nl_get_reply(&nl_req);
285 if (h->nlmsg_type == NLMSG_ERROR)
286 break;
287 log(L_WARN "nl_exchange: Unexpected reply received");
288 }
289 return nl_error(h, ignore_esrch) ? -1 : 0;
290 }
291
292 /*
293 * Netlink attributes
294 */
295
296 static int nl_attr_len;
297
298 static void *
299 nl_checkin(struct nlmsghdr *h, int lsize)
300 {
301 nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
302 if (nl_attr_len < 0)
303 {
304 log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
305 return NULL;
306 }
307 return NLMSG_DATA(h);
308 }
309
310 struct nl_want_attrs {
311 u8 defined:1;
312 u8 checksize:1;
313 u8 size;
314 };
315
316
317 #define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
318
319 static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = {
320 [IFLA_IFNAME] = { 1, 0, 0 },
321 [IFLA_MTU] = { 1, 1, sizeof(u32) },
322 [IFLA_MASTER] = { 1, 1, sizeof(u32) },
323 [IFLA_WIRELESS] = { 1, 0, 0 },
324 };
325
326
327 #define BIRD_IFA_MAX (IFA_FLAGS+1)
328
329 static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = {
330 [IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) },
331 [IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) },
332 [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) },
333 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
334 };
335
336 static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
337 [IFA_ADDRESS] = { 1, 1, sizeof(ip6_addr) },
338 [IFA_LOCAL] = { 1, 1, sizeof(ip6_addr) },
339 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
340 };
341
342
343 #define BIRD_RTA_MAX (RTA_ENCAP+1)
344
345 static struct nl_want_attrs nexthop_attr_want4[BIRD_RTA_MAX] = {
346 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
347 [RTA_VIA] = { 1, 0, 0 },
348 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
349 [RTA_ENCAP] = { 1, 0, 0 },
350 };
351
352 static struct nl_want_attrs nexthop_attr_want6[BIRD_RTA_MAX] = {
353 [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
354 [RTA_VIA] = { 1, 0, 0 },
355 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
356 [RTA_ENCAP] = { 1, 0, 0 },
357 };
358
359 #ifdef HAVE_MPLS_KERNEL
360 static struct nl_want_attrs encap_mpls_want[BIRD_RTA_MAX] = {
361 [RTA_DST] = { 1, 0, 0 },
362 };
363 #endif
364
365 static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
366 [RTA_DST] = { 1, 1, sizeof(ip4_addr) },
367 [RTA_OIF] = { 1, 1, sizeof(u32) },
368 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
369 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
370 [RTA_PREFSRC] = { 1, 1, sizeof(ip4_addr) },
371 [RTA_METRICS] = { 1, 0, 0 },
372 [RTA_MULTIPATH] = { 1, 0, 0 },
373 [RTA_FLOW] = { 1, 1, sizeof(u32) },
374 [RTA_TABLE] = { 1, 1, sizeof(u32) },
375 [RTA_VIA] = { 1, 0, 0 },
376 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
377 [RTA_ENCAP] = { 1, 0, 0 },
378 };
379
380 static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
381 [RTA_DST] = { 1, 1, sizeof(ip6_addr) },
382 [RTA_SRC] = { 1, 1, sizeof(ip6_addr) },
383 [RTA_IIF] = { 1, 1, sizeof(u32) },
384 [RTA_OIF] = { 1, 1, sizeof(u32) },
385 [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
386 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
387 [RTA_PREFSRC] = { 1, 1, sizeof(ip6_addr) },
388 [RTA_METRICS] = { 1, 0, 0 },
389 [RTA_MULTIPATH] = { 1, 0, 0 },
390 [RTA_FLOW] = { 1, 1, sizeof(u32) },
391 [RTA_TABLE] = { 1, 1, sizeof(u32) },
392 [RTA_VIA] = { 1, 0, 0 },
393 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
394 [RTA_ENCAP] = { 1, 0, 0 },
395 };
396
397 #ifdef HAVE_MPLS_KERNEL
398 static struct nl_want_attrs rtm_attr_want_mpls[BIRD_RTA_MAX] = {
399 [RTA_DST] = { 1, 1, sizeof(u32) },
400 [RTA_IIF] = { 1, 1, sizeof(u32) },
401 [RTA_OIF] = { 1, 1, sizeof(u32) },
402 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
403 [RTA_METRICS] = { 1, 0, 0 },
404 [RTA_FLOW] = { 1, 1, sizeof(u32) },
405 [RTA_TABLE] = { 1, 1, sizeof(u32) },
406 [RTA_VIA] = { 1, 0, 0 },
407 [RTA_NEWDST] = { 1, 0, 0 },
408 };
409 #endif
410
411
412 static int
413 nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, int ksize)
414 {
415 int max = ksize / sizeof(struct rtattr *);
416 bzero(k, ksize);
417
418 for ( ; RTA_OK(a, nl_attr_len); a = RTA_NEXT(a, nl_attr_len))
419 {
420 if ((a->rta_type >= max) || !want[a->rta_type].defined)
421 continue;
422
423 if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size))
424 {
425 log(L_ERR "nl_parse_attrs: Malformed attribute received");
426 return 0;
427 }
428
429 k[a->rta_type] = a;
430 }
431
432 if (nl_attr_len)
433 {
434 log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
435 return 0;
436 }
437
438 return 1;
439 }
440
441 static inline u16 rta_get_u16(struct rtattr *a)
442 { return *(u16 *) RTA_DATA(a); }
443
444 static inline u32 rta_get_u32(struct rtattr *a)
445 { return *(u32 *) RTA_DATA(a); }
446
447 static inline ip4_addr rta_get_ip4(struct rtattr *a)
448 { return ip4_ntoh(*(ip4_addr *) RTA_DATA(a)); }
449
450 static inline ip6_addr rta_get_ip6(struct rtattr *a)
451 { return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); }
452
453 static inline ip_addr rta_get_ipa(struct rtattr *a)
454 {
455 if (RTA_PAYLOAD(a) == sizeof(ip4_addr))
456 return ipa_from_ip4(rta_get_ip4(a));
457 else
458 return ipa_from_ip6(rta_get_ip6(a));
459 }
460
461 #ifdef HAVE_MPLS_KERNEL
462 static inline ip_addr rta_get_via(struct rtattr *a)
463 {
464 struct rtvia *v = RTA_DATA(a);
465 switch(v->rtvia_family) {
466 case AF_INET: return ipa_from_ip4(ip4_ntoh(*(ip4_addr *) v->rtvia_addr));
467 case AF_INET6: return ipa_from_ip6(ip6_ntoh(*(ip6_addr *) v->rtvia_addr));
468 }
469 return IPA_NONE;
470 }
471
472 static u32 rta_mpls_stack[MPLS_MAX_LABEL_STACK];
473 static inline int rta_get_mpls(struct rtattr *a, u32 *stack)
474 {
475 if (!a)
476 return 0;
477
478 if (RTA_PAYLOAD(a) % 4)
479 log(L_WARN "KRT: Strange length of received MPLS stack: %u", RTA_PAYLOAD(a));
480
481 int labels = mpls_get(RTA_DATA(a), RTA_PAYLOAD(a) & ~0x3, stack);
482
483 if (labels < 0)
484 {
485 log(L_WARN "KRT: Too long MPLS stack received, ignoring");
486 labels = 0;
487 }
488
489 return labels;
490 }
491 #endif
492
493 struct rtattr *
494 nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen)
495 {
496 uint pos = NLMSG_ALIGN(h->nlmsg_len);
497 uint len = RTA_LENGTH(dlen);
498
499 if (pos + len > bufsize)
500 bug("nl_add_attr: packet buffer overflow");
501
502 struct rtattr *a = (struct rtattr *)((char *)h + pos);
503 a->rta_type = code;
504 a->rta_len = len;
505 h->nlmsg_len = pos + len;
506
507 if (dlen > 0)
508 memcpy(RTA_DATA(a), data, dlen);
509
510 return a;
511 }
512
513 static inline struct rtattr *
514 nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
515 {
516 return nl_add_attr(h, bufsize, code, NULL, 0);
517 }
518
519 static inline void
520 nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
521 {
522 a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a;
523 }
524
525 static inline void
526 nl_add_attr_u16(struct nlmsghdr *h, uint bufsize, int code, u16 data)
527 {
528 nl_add_attr(h, bufsize, code, &data, 2);
529 }
530
531 static inline void
532 nl_add_attr_u32(struct nlmsghdr *h, uint bufsize, int code, u32 data)
533 {
534 nl_add_attr(h, bufsize, code, &data, 4);
535 }
536
537 static inline void
538 nl_add_attr_ip4(struct nlmsghdr *h, uint bufsize, int code, ip4_addr ip4)
539 {
540 ip4 = ip4_hton(ip4);
541 nl_add_attr(h, bufsize, code, &ip4, sizeof(ip4));
542 }
543
544 static inline void
545 nl_add_attr_ip6(struct nlmsghdr *h, uint bufsize, int code, ip6_addr ip6)
546 {
547 ip6 = ip6_hton(ip6);
548 nl_add_attr(h, bufsize, code, &ip6, sizeof(ip6));
549 }
550
551 static inline void
552 nl_add_attr_ipa(struct nlmsghdr *h, uint bufsize, int code, ip_addr ipa)
553 {
554 if (ipa_is_ip4(ipa))
555 nl_add_attr_ip4(h, bufsize, code, ipa_to_ip4(ipa));
556 else
557 nl_add_attr_ip6(h, bufsize, code, ipa_to_ip6(ipa));
558 }
559
560 #ifdef HAVE_MPLS_KERNEL
561 static inline void
562 nl_add_attr_mpls(struct nlmsghdr *h, uint bufsize, int code, int len, u32 *stack)
563 {
564 char buf[len*4];
565 mpls_put(buf, len, stack);
566 nl_add_attr(h, bufsize, code, buf, len*4);
567 }
568
569 static inline void
570 nl_add_attr_mpls_encap(struct nlmsghdr *h, uint bufsize, int len, u32 *stack)
571 {
572 nl_add_attr_u16(h, bufsize, RTA_ENCAP_TYPE, LWTUNNEL_ENCAP_MPLS);
573
574 struct rtattr *nest = nl_open_attr(h, bufsize, RTA_ENCAP);
575 nl_add_attr_mpls(h, bufsize, RTA_DST, len, stack);
576 nl_close_attr(h, nest);
577 }
578
579 static inline void
580 nl_add_attr_via(struct nlmsghdr *h, uint bufsize, ip_addr ipa)
581 {
582 struct rtvia *via = alloca(sizeof(struct rtvia) + 16);
583
584 if (ipa_is_ip4(ipa))
585 {
586 via->rtvia_family = AF_INET;
587 put_ip4(via->rtvia_addr, ipa_to_ip4(ipa));
588 nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 4);
589 }
590 else
591 {
592 via->rtvia_family = AF_INET6;
593 put_ip6(via->rtvia_addr, ipa_to_ip6(ipa));
594 nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 16);
595 }
596 }
597 #endif
598
599 static inline struct rtnexthop *
600 nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
601 {
602 uint pos = NLMSG_ALIGN(h->nlmsg_len);
603 uint len = RTNH_LENGTH(0);
604
605 if (pos + len > bufsize)
606 bug("nl_open_nexthop: packet buffer overflow");
607
608 h->nlmsg_len = pos + len;
609
610 return (void *)h + pos;
611 }
612
613 static inline void
614 nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh)
615 {
616 nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh;
617 }
618
619 static inline void
620 nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af UNUSED)
621 {
622 #ifdef HAVE_MPLS_KERNEL
623 if (nh->labels > 0)
624 if (af == AF_MPLS)
625 nl_add_attr_mpls(h, bufsize, RTA_NEWDST, nh->labels, nh->label);
626 else
627 nl_add_attr_mpls_encap(h, bufsize, nh->labels, nh->label);
628
629 if (ipa_nonzero(nh->gw))
630 {
631 if (af == (ipa_is_ip4(nh->gw) ? AF_INET : AF_INET6))
632 nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
633 else
634 nl_add_attr_via(h, bufsize, nh->gw);
635 }
636 #else
637
638 if (ipa_nonzero(nh->gw))
639 nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
640 #endif
641 }
642
643 static void
644 nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af)
645 {
646 struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH);
647
648 for (; nh; nh = nh->next)
649 {
650 struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize);
651
652 rtnh->rtnh_flags = 0;
653 rtnh->rtnh_hops = nh->weight;
654 rtnh->rtnh_ifindex = nh->iface->index;
655
656 nl_add_nexthop(h, bufsize, nh, af);
657
658 if (nh->flags & RNF_ONLINK)
659 rtnh->rtnh_flags |= RTNH_F_ONLINK;
660
661 nl_close_nexthop(h, rtnh);
662 }
663
664 nl_close_attr(h, a);
665 }
666
667 static struct nexthop *
668 nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr *ra, int af)
669 {
670 struct rtattr *a[BIRD_RTA_MAX];
671 struct rtnexthop *nh = RTA_DATA(ra);
672 struct nexthop *rv, *first, **last;
673 unsigned len = RTA_PAYLOAD(ra);
674
675 first = NULL;
676 last = &first;
677
678 while (len)
679 {
680 /* Use RTNH_OK(nh,len) ?? */
681 if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
682 return NULL;
683
684 *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE);
685 last = &(rv->next);
686
687 rv->weight = nh->rtnh_hops;
688 rv->iface = if_find_by_index(nh->rtnh_ifindex);
689 if (!rv->iface)
690 return NULL;
691
692 /* Nonexistent RTNH_PAYLOAD ?? */
693 nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
694 switch (af)
695 {
696 case AF_INET:
697 if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a)))
698 return NULL;
699 break;
700
701 case AF_INET6:
702 if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want6, a, sizeof(a)))
703 return NULL;
704 break;
705
706 default:
707 return NULL;
708 }
709
710 if (a[RTA_GATEWAY])
711 rv->gw = rta_get_ipa(a[RTA_GATEWAY]);
712
713 #ifdef HAVE_MPLS_KERNEL
714 if (a[RTA_VIA])
715 rv->gw = rta_get_via(a[RTA_VIA]);
716 #endif
717
718 if (ipa_nonzero(rv->gw))
719 {
720 if (nh->rtnh_flags & RTNH_F_ONLINK)
721 rv->flags |= RNF_ONLINK;
722
723 neighbor *nbr;
724 nbr = neigh_find(&p->p, rv->gw, rv->iface,
725 (rv->flags & RNF_ONLINK) ? NEF_ONLINK : 0);
726 if (!nbr || (nbr->scope == SCOPE_HOST))
727 return NULL;
728 }
729
730 #ifdef HAVE_MPLS_KERNEL
731 if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE])
732 {
733 if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS) {
734 log(L_WARN "KRT: Unknown encapsulation method %d in multipath", rta_get_u16(a[RTA_ENCAP_TYPE]));
735 return NULL;
736 }
737
738 struct rtattr *enca[BIRD_RTA_MAX];
739 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
740 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
741 rv->labels = rta_get_mpls(enca[RTA_DST], rv->label);
742 }
743 #endif
744
745
746 len -= NLMSG_ALIGN(nh->rtnh_len);
747 nh = RTNH_NEXT(nh);
748 }
749
750 /* Ensure nexthops are sorted to satisfy nest invariant */
751 if (!nexthop_is_sorted(first))
752 first = nexthop_sort(first);
753
754 return first;
755 }
756
757 static void
758 nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, int max)
759 {
760 struct rtattr *a = nl_open_attr(h, bufsize, RTA_METRICS);
761 int t;
762
763 for (t = 1; t < max; t++)
764 if (metrics[0] & (1 << t))
765 nl_add_attr_u32(h, bufsize, t, metrics[t]);
766
767 nl_close_attr(h, a);
768 }
769
770 static int
771 nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int max)
772 {
773 struct rtattr *a = RTA_DATA(hdr);
774 int len = RTA_PAYLOAD(hdr);
775
776 metrics[0] = 0;
777 for (; RTA_OK(a, len); a = RTA_NEXT(a, len))
778 {
779 if (a->rta_type == RTA_UNSPEC)
780 continue;
781
782 if (a->rta_type >= max)
783 continue;
784
785 if (RTA_PAYLOAD(a) != 4)
786 return -1;
787
788 metrics[0] |= 1 << a->rta_type;
789 metrics[a->rta_type] = rta_get_u32(a);
790 }
791
792 if (len > 0)
793 return -1;
794
795 return 0;
796 }
797
798
799 /*
800 * Scanning of interfaces
801 */
802
803 static void
804 nl_parse_link(struct nlmsghdr *h, int scan)
805 {
806 struct ifinfomsg *i;
807 struct rtattr *a[BIRD_IFLA_MAX];
808 int new = h->nlmsg_type == RTM_NEWLINK;
809 struct iface f = {};
810 struct iface *ifi;
811 char *name;
812 u32 mtu, master = 0;
813 uint fl;
814
815 if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), ifla_attr_want, a, sizeof(a)))
816 return;
817 if (!a[IFLA_IFNAME] || (RTA_PAYLOAD(a[IFLA_IFNAME]) < 2) || !a[IFLA_MTU])
818 {
819 /*
820 * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
821 * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
822 * We simply ignore all such messages with IFLA_WIRELESS without notice.
823 */
824
825 if (a[IFLA_WIRELESS])
826 return;
827
828 log(L_ERR "KIF: Malformed message received");
829 return;
830 }
831
832 name = RTA_DATA(a[IFLA_IFNAME]);
833 mtu = rta_get_u32(a[IFLA_MTU]);
834
835 if (a[IFLA_MASTER])
836 master = rta_get_u32(a[IFLA_MASTER]);
837
838 ifi = if_find_by_index(i->ifi_index);
839 if (!new)
840 {
841 DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
842 if (!ifi)
843 return;
844
845 if_delete(ifi);
846 }
847 else
848 {
849 DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
850 if (ifi && strncmp(ifi->name, name, sizeof(ifi->name)-1))
851 if_delete(ifi);
852
853 strncpy(f.name, name, sizeof(f.name)-1);
854 f.index = i->ifi_index;
855 f.mtu = mtu;
856
857 f.master_index = master;
858 f.master = if_find_by_index(master);
859
860 fl = i->ifi_flags;
861 if (fl & IFF_UP)
862 f.flags |= IF_ADMIN_UP;
863 if (fl & IFF_LOWER_UP)
864 f.flags |= IF_LINK_UP;
865 if (fl & IFF_LOOPBACK) /* Loopback */
866 f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
867 else if (fl & IFF_POINTOPOINT) /* PtP */
868 f.flags |= IF_MULTICAST;
869 else if (fl & IFF_BROADCAST) /* Broadcast */
870 f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
871 else
872 f.flags |= IF_MULTIACCESS; /* NBMA */
873
874 if (fl & IFF_MULTICAST)
875 f.flags |= IF_MULTICAST;
876
877 ifi = if_update(&f);
878
879 if (!scan)
880 if_end_partial_update(ifi);
881 }
882 }
883
884 static void
885 nl_parse_addr4(struct ifaddrmsg *i, int scan, int new)
886 {
887 struct rtattr *a[BIRD_IFA_MAX];
888 struct iface *ifi;
889 u32 ifa_flags;
890 int scope;
891
892 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a)))
893 return;
894
895 if (!a[IFA_LOCAL])
896 {
897 log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)");
898 return;
899 }
900 if (!a[IFA_ADDRESS])
901 {
902 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
903 return;
904 }
905
906 ifi = if_find_by_index(i->ifa_index);
907 if (!ifi)
908 {
909 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
910 return;
911 }
912
913 if (a[IFA_FLAGS])
914 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
915 else
916 ifa_flags = i->ifa_flags;
917
918 struct ifa ifa;
919 bzero(&ifa, sizeof(ifa));
920 ifa.iface = ifi;
921 if (ifa_flags & IFA_F_SECONDARY)
922 ifa.flags |= IA_SECONDARY;
923
924 ifa.ip = rta_get_ipa(a[IFA_LOCAL]);
925
926 if (i->ifa_prefixlen > IP4_MAX_PREFIX_LENGTH)
927 {
928 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
929 new = 0;
930 }
931 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH)
932 {
933 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
934 net_fill_ip4(&ifa.prefix, rta_get_ip4(a[IFA_ADDRESS]), i->ifa_prefixlen);
935
936 /* It is either a host address or a peer address */
937 if (ipa_equal(ifa.ip, ifa.brd))
938 ifa.flags |= IA_HOST;
939 else
940 {
941 ifa.flags |= IA_PEER;
942 ifa.opposite = ifa.brd;
943 }
944 }
945 else
946 {
947 net_fill_ip4(&ifa.prefix, ipa_to_ip4(ifa.ip), i->ifa_prefixlen);
948 net_normalize(&ifa.prefix);
949
950 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 1)
951 ifa.opposite = ipa_opposite_m1(ifa.ip);
952
953 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 2)
954 ifa.opposite = ipa_opposite_m2(ifa.ip);
955
956 if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST])
957 {
958 ip4_addr xbrd = rta_get_ip4(a[IFA_BROADCAST]);
959 ip4_addr ybrd = ip4_or(ipa_to_ip4(ifa.ip), ip4_not(ip4_mkmask(i->ifa_prefixlen)));
960
961 if (ip4_equal(xbrd, net4_prefix(&ifa.prefix)) || ip4_equal(xbrd, ybrd))
962 ifa.brd = ipa_from_ip4(xbrd);
963 else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */
964 {
965 log(L_ERR "KIF: Invalid broadcast address %I4 for %s", xbrd, ifi->name);
966 ifa.brd = ipa_from_ip4(ybrd);
967 }
968 }
969 }
970
971 scope = ipa_classify(ifa.ip);
972 if (scope < 0)
973 {
974 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
975 return;
976 }
977 ifa.scope = scope & IADDR_SCOPE_MASK;
978
979 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
980 ifi->index, ifi->name,
981 new ? "added" : "removed",
982 ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite);
983
984 if (new)
985 ifa_update(&ifa);
986 else
987 ifa_delete(&ifa);
988
989 if (!scan)
990 if_end_partial_update(ifi);
991 }
992
993 static void
994 nl_parse_addr6(struct ifaddrmsg *i, int scan, int new)
995 {
996 struct rtattr *a[BIRD_IFA_MAX];
997 struct iface *ifi;
998 u32 ifa_flags;
999 int scope;
1000
1001 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a)))
1002 return;
1003
1004 if (!a[IFA_ADDRESS])
1005 {
1006 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
1007 return;
1008 }
1009
1010 ifi = if_find_by_index(i->ifa_index);
1011 if (!ifi)
1012 {
1013 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
1014 return;
1015 }
1016
1017 if (a[IFA_FLAGS])
1018 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
1019 else
1020 ifa_flags = i->ifa_flags;
1021
1022 struct ifa ifa;
1023 bzero(&ifa, sizeof(ifa));
1024 ifa.iface = ifi;
1025 if (ifa_flags & IFA_F_SECONDARY)
1026 ifa.flags |= IA_SECONDARY;
1027
1028 /* Ignore tentative addresses silently */
1029 if (ifa_flags & IFA_F_TENTATIVE)
1030 return;
1031
1032 /* IFA_LOCAL can be unset for IPv6 interfaces */
1033 ifa.ip = rta_get_ipa(a[IFA_LOCAL] ? : a[IFA_ADDRESS]);
1034
1035 if (i->ifa_prefixlen > IP6_MAX_PREFIX_LENGTH)
1036 {
1037 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
1038 new = 0;
1039 }
1040 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH)
1041 {
1042 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
1043 net_fill_ip6(&ifa.prefix, rta_get_ip6(a[IFA_ADDRESS]), i->ifa_prefixlen);
1044
1045 /* It is either a host address or a peer address */
1046 if (ipa_equal(ifa.ip, ifa.brd))
1047 ifa.flags |= IA_HOST;
1048 else
1049 {
1050 ifa.flags |= IA_PEER;
1051 ifa.opposite = ifa.brd;
1052 }
1053 }
1054 else
1055 {
1056 net_fill_ip6(&ifa.prefix, ipa_to_ip6(ifa.ip), i->ifa_prefixlen);
1057 net_normalize(&ifa.prefix);
1058
1059 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH - 1)
1060 ifa.opposite = ipa_opposite_m1(ifa.ip);
1061 }
1062
1063 scope = ipa_classify(ifa.ip);
1064 if (scope < 0)
1065 {
1066 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
1067 return;
1068 }
1069 ifa.scope = scope & IADDR_SCOPE_MASK;
1070
1071 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
1072 ifi->index, ifi->name,
1073 new ? "added" : "removed",
1074 ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite);
1075
1076 if (new)
1077 ifa_update(&ifa);
1078 else
1079 ifa_delete(&ifa);
1080
1081 if (!scan)
1082 if_end_partial_update(ifi);
1083 }
1084
1085 static void
1086 nl_parse_addr(struct nlmsghdr *h, int scan)
1087 {
1088 struct ifaddrmsg *i;
1089
1090 if (!(i = nl_checkin(h, sizeof(*i))))
1091 return;
1092
1093 int new = (h->nlmsg_type == RTM_NEWADDR);
1094
1095 switch (i->ifa_family)
1096 {
1097 case AF_INET:
1098 return nl_parse_addr4(i, scan, new);
1099
1100 case AF_INET6:
1101 return nl_parse_addr6(i, scan, new);
1102 }
1103 }
1104
1105 void
1106 kif_do_scan(struct kif_proto *p UNUSED)
1107 {
1108 struct nlmsghdr *h;
1109
1110 if_start_update();
1111
1112 nl_request_dump(AF_UNSPEC, RTM_GETLINK);
1113 while (h = nl_get_scan())
1114 if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
1115 nl_parse_link(h, 1);
1116 else
1117 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
1118
1119 /* Re-resolve master interface for slaves */
1120 struct iface *i;
1121 WALK_LIST(i, iface_list)
1122 if (i->master_index)
1123 {
1124 struct iface f = {
1125 .flags = i->flags,
1126 .mtu = i->mtu,
1127 .index = i->index,
1128 .master_index = i->master_index,
1129 .master = if_find_by_index(i->master_index)
1130 };
1131
1132 if (f.master != i->master)
1133 {
1134 memcpy(f.name, i->name, sizeof(f.name));
1135 if_update(&f);
1136 }
1137 }
1138
1139 nl_request_dump(AF_INET, RTM_GETADDR);
1140 while (h = nl_get_scan())
1141 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
1142 nl_parse_addr(h, 1);
1143 else
1144 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
1145
1146 nl_request_dump(AF_INET6, RTM_GETADDR);
1147 while (h = nl_get_scan())
1148 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
1149 nl_parse_addr(h, 1);
1150 else
1151 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
1152
1153 if_end_update();
1154 }
1155
1156 /*
1157 * Routes
1158 */
1159
1160 static inline u32
1161 krt_table_id(struct krt_proto *p)
1162 {
1163 return KRT_CF->sys.table_id;
1164 }
1165
1166 static HASH(struct krt_proto) nl_table_map;
1167
1168 #define RTH_KEY(p) p->af, krt_table_id(p)
1169 #define RTH_NEXT(p) p->sys.hash_next
1170 #define RTH_EQ(a1,i1,a2,i2) a1 == a2 && i1 == i2
1171 #define RTH_FN(a,i) a ^ u32_hash(i)
1172
1173 #define RTH_REHASH rth_rehash
1174 #define RTH_PARAMS /8, *2, 2, 2, 6, 20
1175
1176 HASH_DEFINE_REHASH_FN(RTH, struct krt_proto)
1177
1178 int
1179 krt_capable(rte *e)
1180 {
1181 rta *a = e->attrs;
1182
1183 switch (a->dest)
1184 {
1185 case RTD_UNICAST:
1186 case RTD_BLACKHOLE:
1187 case RTD_UNREACHABLE:
1188 case RTD_PROHIBIT:
1189 return 1;
1190
1191 default:
1192 return 0;
1193 }
1194 }
1195
1196 static inline int
1197 nh_bufsize(struct nexthop *nh)
1198 {
1199 int rv = 0;
1200 for (; nh != NULL; nh = nh->next)
1201 rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
1202 return rv;
1203 }
1204
1205 static int
1206 nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh)
1207 {
1208 eattr *ea;
1209 net *net = e->net;
1210 rta *a = e->attrs;
1211 ea_list *eattrs = a->eattrs;
1212 int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh));
1213 u32 priority = 0;
1214
1215 struct {
1216 struct nlmsghdr h;
1217 struct rtmsg r;
1218 char buf[0];
1219 } *r;
1220
1221 int rsize = sizeof(*r) + bufsize;
1222 r = alloca(rsize);
1223
1224 DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op);
1225
1226 bzero(&r->h, sizeof(r->h));
1227 bzero(&r->r, sizeof(r->r));
1228 r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE;
1229 r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1230 r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK;
1231
1232 r->r.rtm_family = p->af;
1233 r->r.rtm_dst_len = net_pxlen(net->n.addr);
1234 r->r.rtm_protocol = RTPROT_BIRD;
1235 r->r.rtm_scope = RT_SCOPE_NOWHERE;
1236 #ifdef HAVE_MPLS_KERNEL
1237 if (p->af == AF_MPLS)
1238 {
1239 /*
1240 * Kernel MPLS code is a bit picky. We must:
1241 * 1) Always set RT_SCOPE_UNIVERSE and RTN_UNICAST (even for RTM_DELROUTE)
1242 * 2) Never use RTA_PRIORITY
1243 */
1244
1245 u32 label = net_mpls(net->n.addr);
1246 nl_add_attr_mpls(&r->h, rsize, RTA_DST, 1, &label);
1247 r->r.rtm_scope = RT_SCOPE_UNIVERSE;
1248 r->r.rtm_type = RTN_UNICAST;
1249 }
1250 else
1251 #endif
1252 {
1253 nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr));
1254
1255 /* Add source address for IPv6 SADR routes */
1256 if (net->n.addr->type == NET_IP6_SADR)
1257 {
1258 net_addr_ip6_sadr *a = (void *) &net->n.addr;
1259 nl_add_attr_ip6(&r->h, rsize, RTA_SRC, a->src_prefix);
1260 r->r.rtm_src_len = a->src_pxlen;
1261 }
1262 }
1263
1264 /*
1265 * Strange behavior for RTM_DELROUTE:
1266 * 1) rtm_family is ignored in IPv6, works for IPv4
1267 * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
1268 * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
1269 */
1270
1271 if (krt_table_id(p) < 256)
1272 r->r.rtm_table = krt_table_id(p);
1273 else
1274 nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p));
1275
1276 if (p->af == AF_MPLS)
1277 priority = 0;
1278 else if (a->source == RTS_DUMMY)
1279 priority = e->u.krt.metric;
1280 else if (KRT_CF->sys.metric)
1281 priority = KRT_CF->sys.metric;
1282 else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC)))
1283 priority = ea->u.data;
1284
1285 if (priority)
1286 nl_add_attr_u32(&r->h, rsize, RTA_PRIORITY, priority);
1287
1288 /* For route delete, we do not specify remaining route attributes */
1289 if (op == NL_OP_DELETE)
1290 goto dest;
1291
1292 /* Default scope is LINK for device routes, UNIVERSE otherwise */
1293 if (p->af == AF_MPLS)
1294 r->r.rtm_scope = RT_SCOPE_UNIVERSE;
1295 else if (ea = ea_find(eattrs, EA_KRT_SCOPE))
1296 r->r.rtm_scope = ea->u.data;
1297 else
1298 r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
1299
1300 if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
1301 nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
1302
1303 if (ea = ea_find(eattrs, EA_KRT_REALM))
1304 nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data);
1305
1306
1307 u32 metrics[KRT_METRICS_MAX];
1308 metrics[0] = 0;
1309
1310 struct ea_walk_state ews = { .eattrs = eattrs };
1311 while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX))
1312 {
1313 int id = ea->id - EA_KRT_METRICS;
1314 metrics[0] |= 1 << id;
1315 metrics[id] = ea->u.data;
1316 }
1317
1318 if (metrics[0])
1319 nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX);
1320
1321
1322 dest:
1323 switch (dest)
1324 {
1325 case RTD_UNICAST:
1326 r->r.rtm_type = RTN_UNICAST;
1327 if (nh->next && !krt_ecmp6(p))
1328 nl_add_multipath(&r->h, rsize, nh, p->af);
1329 else
1330 {
1331 nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index);
1332 nl_add_nexthop(&r->h, rsize, nh, p->af);
1333
1334 if (nh->flags & RNF_ONLINK)
1335 r->r.rtm_flags |= RTNH_F_ONLINK;
1336 }
1337 break;
1338 case RTD_BLACKHOLE:
1339 r->r.rtm_type = RTN_BLACKHOLE;
1340 break;
1341 case RTD_UNREACHABLE:
1342 r->r.rtm_type = RTN_UNREACHABLE;
1343 break;
1344 case RTD_PROHIBIT:
1345 r->r.rtm_type = RTN_PROHIBIT;
1346 break;
1347 case RTD_NONE:
1348 break;
1349 default:
1350 bug("krt_capable inconsistent with nl_send_route");
1351 }
1352
1353 /* Ignore missing for DELETE */
1354 return nl_exchange(&r->h, (op == NL_OP_DELETE));
1355 }
1356
1357 static inline int
1358 nl_add_rte(struct krt_proto *p, rte *e)
1359 {
1360 rta *a = e->attrs;
1361 int err = 0;
1362
1363 if (krt_ecmp6(p) && a->nh.next)
1364 {
1365 struct nexthop *nh = &(a->nh);
1366
1367 err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, nh);
1368 if (err < 0)
1369 return err;
1370
1371 for (nh = nh->next; nh; nh = nh->next)
1372 err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, nh);
1373
1374 return err;
1375 }
1376
1377 return nl_send_route(p, e, NL_OP_ADD, a->dest, &(a->nh));
1378 }
1379
1380 static inline int
1381 nl_delete_rte(struct krt_proto *p, rte *e)
1382 {
1383 int err = 0;
1384
1385 /* For IPv6, we just repeatedly request DELETE until we get error */
1386 do
1387 err = nl_send_route(p, e, NL_OP_DELETE, RTD_NONE, NULL);
1388 while (krt_ecmp6(p) && !err);
1389
1390 return err;
1391 }
1392
1393 static inline int
1394 nl_replace_rte(struct krt_proto *p, rte *e)
1395 {
1396 rta *a = e->attrs;
1397 return nl_send_route(p, e, NL_OP_REPLACE, a->dest, &(a->nh));
1398 }
1399
1400
1401 void
1402 krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old)
1403 {
1404 int err = 0;
1405
1406 /*
1407 * We use NL_OP_REPLACE for IPv4, it has an issue with not checking for
1408 * matching rtm_protocol, but that is OK when dedicated priority is used.
1409 *
1410 * We do not use NL_OP_REPLACE for IPv6, as it has broken semantics for ECMP
1411 * and with some kernel versions ECMP replace crashes kernel. Would need more
1412 * testing and checks for kernel versions.
1413 *
1414 * For IPv6, we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the
1415 * old route value, so we do not try to optimize IPv6 ECMP reconfigurations.
1416 */
1417
1418 if (krt_ipv4(p) && old && new)
1419 {
1420 err = nl_replace_rte(p, new);
1421 }
1422 else
1423 {
1424 if (old)
1425 nl_delete_rte(p, old);
1426
1427 if (new)
1428 err = nl_add_rte(p, new);
1429 }
1430
1431 if (err < 0)
1432 n->n.flags |= KRF_SYNC_ERROR;
1433 else
1434 n->n.flags &= ~KRF_SYNC_ERROR;
1435 }
1436
1437 static int
1438 nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type, uint rtm_family)
1439 {
1440 /* Route merging is used for IPv6 scans */
1441 if (!s->scan || (rtm_family != AF_INET6))
1442 return 0;
1443
1444 /* Saved and new route must have same network, proto/table, and priority */
1445 if ((s->net != net) || (s->proto != p) || (s->krt_metric != priority))
1446 return 0;
1447
1448 /* Both must be regular unicast routes */
1449 if ((s->krt_type != RTN_UNICAST) || (krt_type != RTN_UNICAST))
1450 return 0;
1451
1452 return 1;
1453 }
1454
1455 static void
1456 nl_announce_route(struct nl_parse_state *s)
1457 {
1458 rte *e = rte_get_temp(s->attrs);
1459 e->net = s->net;
1460 e->u.krt.src = s->krt_src;
1461 e->u.krt.proto = s->krt_proto;
1462 e->u.krt.seen = 0;
1463 e->u.krt.best = 0;
1464 e->u.krt.metric = s->krt_metric;
1465
1466 if (s->scan)
1467 krt_got_route(s->proto, e);
1468 else
1469 krt_got_route_async(s->proto, e, s->new);
1470
1471 s->net = NULL;
1472 s->attrs = NULL;
1473 s->proto = NULL;
1474 lp_flush(s->pool);
1475 }
1476
1477 static inline void
1478 nl_parse_begin(struct nl_parse_state *s, int scan)
1479 {
1480 memset(s, 0, sizeof (struct nl_parse_state));
1481 s->pool = nl_linpool;
1482 s->scan = scan;
1483 }
1484
1485 static inline void
1486 nl_parse_end(struct nl_parse_state *s)
1487 {
1488 if (s->net)
1489 nl_announce_route(s);
1490 }
1491
1492
1493 #define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
1494
1495 static void
1496 nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
1497 {
1498 struct krt_proto *p;
1499 struct rtmsg *i;
1500 struct rtattr *a[BIRD_RTA_MAX];
1501 int new = h->nlmsg_type == RTM_NEWROUTE;
1502
1503 net_addr dst, src = {};
1504 u32 oif = ~0;
1505 u32 table_id;
1506 u32 priority = 0;
1507 u32 def_scope = RT_SCOPE_UNIVERSE;
1508 int krt_src;
1509
1510 if (!(i = nl_checkin(h, sizeof(*i))))
1511 return;
1512
1513 switch (i->rtm_family)
1514 {
1515 case AF_INET:
1516 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a)))
1517 return;
1518
1519 if (a[RTA_DST])
1520 net_fill_ip4(&dst, rta_get_ip4(a[RTA_DST]), i->rtm_dst_len);
1521 else
1522 net_fill_ip4(&dst, IP4_NONE, 0);
1523 break;
1524
1525 case AF_INET6:
1526 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a)))
1527 return;
1528
1529 if (a[RTA_DST])
1530 net_fill_ip6(&dst, rta_get_ip6(a[RTA_DST]), i->rtm_dst_len);
1531 else
1532 net_fill_ip6(&dst, IP6_NONE, 0);
1533
1534 if (a[RTA_SRC])
1535 net_fill_ip6(&src, rta_get_ip6(a[RTA_SRC]), i->rtm_src_len);
1536 else
1537 net_fill_ip6(&src, IP6_NONE, 0);
1538 break;
1539
1540 #ifdef HAVE_MPLS_KERNEL
1541 case AF_MPLS:
1542 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want_mpls, a, sizeof(a)))
1543 return;
1544
1545 if (!a[RTA_DST])
1546 SKIP("MPLS route without RTA_DST");
1547
1548 if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) != 1)
1549 SKIP("MPLS route with multi-label RTA_DST");
1550
1551 net_fill_mpls(&dst, rta_mpls_stack[0]);
1552 break;
1553 #endif
1554
1555 default:
1556 return;
1557 }
1558
1559 if (a[RTA_OIF])
1560 oif = rta_get_u32(a[RTA_OIF]);
1561
1562 if (a[RTA_TABLE])
1563 table_id = rta_get_u32(a[RTA_TABLE]);
1564 else
1565 table_id = i->rtm_table;
1566
1567 /* Do we know this table? */
1568 p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id);
1569 if (!p)
1570 SKIP("unknown table %u\n", table_id);
1571
1572 if (a[RTA_SRC] && (p->p.net_type != NET_IP6_SADR))
1573 SKIP("src prefix for non-SADR channel\n");
1574
1575 if (a[RTA_IIF])
1576 SKIP("IIF set\n");
1577
1578 if (i->rtm_tos != 0) /* We don't support TOS */
1579 SKIP("TOS %02x\n", i->rtm_tos);
1580
1581 if (s->scan && !new)
1582 SKIP("RTM_DELROUTE in scan\n");
1583
1584 if (a[RTA_PRIORITY])
1585 priority = rta_get_u32(a[RTA_PRIORITY]);
1586
1587 int c = net_classify(&dst);
1588 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
1589 SKIP("strange class/scope\n");
1590
1591 switch (i->rtm_protocol)
1592 {
1593 case RTPROT_UNSPEC:
1594 SKIP("proto unspec\n");
1595
1596 case RTPROT_REDIRECT:
1597 krt_src = KRT_SRC_REDIRECT;
1598 break;
1599
1600 case RTPROT_KERNEL:
1601 krt_src = KRT_SRC_KERNEL;
1602 return;
1603
1604 case RTPROT_BIRD:
1605 if (!s->scan)
1606 SKIP("echo\n");
1607 krt_src = KRT_SRC_BIRD;
1608 break;
1609
1610 case RTPROT_BOOT:
1611 default:
1612 krt_src = KRT_SRC_ALIEN;
1613 }
1614
1615 net_addr *n = &dst;
1616 if (p->p.net_type == NET_IP6_SADR)
1617 {
1618 n = alloca(sizeof(net_addr_ip6_sadr));
1619 net_fill_ip6_sadr(n, net6_prefix(&dst), net6_pxlen(&dst),
1620 net6_prefix(&src), net6_pxlen(&src));
1621 }
1622
1623 net *net = net_get(p->p.main_channel->table, n);
1624
1625 if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type, i->rtm_family))
1626 nl_announce_route(s);
1627
1628 rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE);
1629 ra->src = p->p.main_source;
1630 ra->source = RTS_INHERIT;
1631 ra->scope = SCOPE_UNIVERSE;
1632
1633 switch (i->rtm_type)
1634 {
1635 case RTN_UNICAST:
1636 ra->dest = RTD_UNICAST;
1637
1638 if (a[RTA_MULTIPATH])
1639 {
1640 struct nexthop *nh = nl_parse_multipath(s, p, a[RTA_MULTIPATH], i->rtm_family);
1641 if (!nh)
1642 {
1643 log(L_ERR "KRT: Received strange multipath route %N", net->n.addr);
1644 return;
1645 }
1646
1647 nexthop_link(ra, nh);
1648 break;
1649 }
1650
1651 ra->nh.iface = if_find_by_index(oif);
1652 if (!ra->nh.iface)
1653 {
1654 log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif);
1655 return;
1656 }
1657
1658 if (a[RTA_GATEWAY])
1659 ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]);
1660
1661 #ifdef HAVE_MPLS_KERNEL
1662 if (a[RTA_VIA])
1663 ra->nh.gw = rta_get_via(a[RTA_VIA]);
1664 #endif
1665
1666 if (ipa_nonzero(ra->nh.gw))
1667 {
1668 /* Silently skip strange 6to4 routes */
1669 const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96);
1670 if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit))
1671 return;
1672
1673 if (i->rtm_flags & RTNH_F_ONLINK)
1674 ra->nh.flags |= RNF_ONLINK;
1675
1676 neighbor *nbr;
1677 nbr = neigh_find(&p->p, ra->nh.gw, ra->nh.iface,
1678 (ra->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0);
1679 if (!nbr || (nbr->scope == SCOPE_HOST))
1680 {
1681 log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr,
1682 ra->nh.gw);
1683 return;
1684 }
1685 }
1686
1687 break;
1688 case RTN_BLACKHOLE:
1689 ra->dest = RTD_BLACKHOLE;
1690 break;
1691 case RTN_UNREACHABLE:
1692 ra->dest = RTD_UNREACHABLE;
1693 break;
1694 case RTN_PROHIBIT:
1695 ra->dest = RTD_PROHIBIT;
1696 break;
1697 /* FIXME: What about RTN_THROW? */
1698 default:
1699 SKIP("type %d\n", i->rtm_type);
1700 return;
1701 }
1702
1703 #ifdef HAVE_MPLS_KERNEL
1704 if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next)
1705 ra->nh.labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label);
1706
1707 if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next)
1708 {
1709 switch (rta_get_u16(a[RTA_ENCAP_TYPE]))
1710 {
1711 case LWTUNNEL_ENCAP_MPLS:
1712 {
1713 struct rtattr *enca[BIRD_RTA_MAX];
1714 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
1715 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
1716 ra->nh.labels = rta_get_mpls(enca[RTA_DST], ra->nh.label);
1717 break;
1718 }
1719 default:
1720 SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE]));
1721 break;
1722 }
1723 }
1724 #endif
1725
1726 if (i->rtm_scope != def_scope)
1727 {
1728 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1729 ea->next = ra->eattrs;
1730 ra->eattrs = ea;
1731 ea->flags = EALF_SORTED;
1732 ea->count = 1;
1733 ea->attrs[0].id = EA_KRT_SCOPE;
1734 ea->attrs[0].flags = 0;
1735 ea->attrs[0].type = EAF_TYPE_INT;
1736 ea->attrs[0].u.data = i->rtm_scope;
1737 }
1738
1739 if (a[RTA_PREFSRC])
1740 {
1741 ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]);
1742
1743 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1744 ea->next = ra->eattrs;
1745 ra->eattrs = ea;
1746 ea->flags = EALF_SORTED;
1747 ea->count = 1;
1748 ea->attrs[0].id = EA_KRT_PREFSRC;
1749 ea->attrs[0].flags = 0;
1750 ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
1751
1752 struct adata *ad = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
1753 ad->length = sizeof(ps);
1754 memcpy(ad->data, &ps, sizeof(ps));
1755
1756 ea->attrs[0].u.ptr = ad;
1757 }
1758
1759 if (a[RTA_FLOW])
1760 {
1761 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1762 ea->next = ra->eattrs;
1763 ra->eattrs = ea;
1764 ea->flags = EALF_SORTED;
1765 ea->count = 1;
1766 ea->attrs[0].id = EA_KRT_REALM;
1767 ea->attrs[0].flags = 0;
1768 ea->attrs[0].type = EAF_TYPE_INT;
1769 ea->attrs[0].u.data = rta_get_u32(a[RTA_FLOW]);
1770 }
1771
1772 if (a[RTA_METRICS])
1773 {
1774 u32 metrics[KRT_METRICS_MAX];
1775 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
1776 int t, n = 0;
1777
1778 if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
1779 {
1780 log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net->n.addr);
1781 return;
1782 }
1783
1784 for (t = 1; t < KRT_METRICS_MAX; t++)
1785 if (metrics[0] & (1 << t))
1786 {
1787 ea->attrs[n].id = EA_CODE(PROTOCOL_KERNEL, KRT_METRICS_OFFSET + t);
1788 ea->attrs[n].flags = 0;
1789 ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */
1790 ea->attrs[n].u.data = metrics[t];
1791 n++;
1792 }
1793
1794 if (n > 0)
1795 {
1796 ea->next = ra->eattrs;
1797 ea->flags = EALF_SORTED;
1798 ea->count = n;
1799 ra->eattrs = ea;
1800 }
1801 }
1802
1803 /*
1804 * Ideally, now we would send the received route to the rest of kernel code.
1805 * But IPv6 ECMP routes before 4.11 are sent as a sequence of routes, so we
1806 * postpone it and merge next hops until the end of the sequence. Note that
1807 * when doing merging of next hops, we expect the new route to be unipath.
1808 * Otherwise, we ignore additional next hops in nexthop_insert().
1809 */
1810
1811 if (!s->net)
1812 {
1813 /* Store the new route */
1814 s->net = net;
1815 s->attrs = ra;
1816 s->proto = p;
1817 s->new = new;
1818 s->krt_src = krt_src;
1819 s->krt_type = i->rtm_type;
1820 s->krt_proto = i->rtm_protocol;
1821 s->krt_metric = priority;
1822 }
1823 else
1824 {
1825 /* Merge next hops with the stored route */
1826 rta *oa = s->attrs;
1827
1828 struct nexthop *nhs = &oa->nh;
1829 nexthop_insert(&nhs, &ra->nh);
1830
1831 /* Perhaps new nexthop is inserted at the first position */
1832 if (nhs == &ra->nh)
1833 {
1834 /* Swap rtas */
1835 s->attrs = ra;
1836
1837 /* Keep old eattrs */
1838 ra->eattrs = oa->eattrs;
1839 }
1840 }
1841 }
1842
1843 void
1844 krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
1845 {
1846 struct nlmsghdr *h;
1847 struct nl_parse_state s;
1848
1849 nl_parse_begin(&s, 1);
1850 nl_request_dump(AF_UNSPEC, RTM_GETROUTE);
1851 while (h = nl_get_scan())
1852 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
1853 nl_parse_route(&s, h);
1854 else
1855 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
1856 nl_parse_end(&s);
1857 }
1858
1859 /*
1860 * Asynchronous Netlink interface
1861 */
1862
1863 static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
1864 static byte *nl_async_rx_buffer; /* Receive buffer */
1865
1866 static void
1867 nl_async_msg(struct nlmsghdr *h)
1868 {
1869 struct nl_parse_state s;
1870
1871 switch (h->nlmsg_type)
1872 {
1873 case RTM_NEWROUTE:
1874 case RTM_DELROUTE:
1875 DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
1876 nl_parse_begin(&s, 0);
1877 nl_parse_route(&s, h);
1878 nl_parse_end(&s);
1879 break;
1880 case RTM_NEWLINK:
1881 case RTM_DELLINK:
1882 DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
1883 if (kif_proto)
1884 nl_parse_link(h, 0);
1885 break;
1886 case RTM_NEWADDR:
1887 case RTM_DELADDR:
1888 DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
1889 if (kif_proto)
1890 nl_parse_addr(h, 0);
1891 break;
1892 default:
1893 DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
1894 }
1895 }
1896
1897 static int
1898 nl_async_hook(sock *sk, uint size UNUSED)
1899 {
1900 struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
1901 struct sockaddr_nl sa;
1902 struct msghdr m = {
1903 .msg_name = &sa,
1904 .msg_namelen = sizeof(sa),
1905 .msg_iov = &iov,
1906 .msg_iovlen = 1,
1907 };
1908 struct nlmsghdr *h;
1909 int x;
1910 uint len;
1911
1912 x = recvmsg(sk->fd, &m, 0);
1913 if (x < 0)
1914 {
1915 if (errno == ENOBUFS)
1916 {
1917 /*
1918 * Netlink reports some packets have been thrown away.
1919 * One day we might react to it by asking for route table
1920 * scan in near future.
1921 */
1922 log(L_WARN "Kernel dropped some netlink messages, will resync on next scan.");
1923 return 1; /* More data are likely to be ready */
1924 }
1925 else if (errno != EWOULDBLOCK)
1926 log(L_ERR "Netlink recvmsg: %m");
1927 return 0;
1928 }
1929 if (sa.nl_pid) /* It isn't from the kernel */
1930 {
1931 DBG("Non-kernel packet\n");
1932 return 1;
1933 }
1934 h = (void *) nl_async_rx_buffer;
1935 len = x;
1936 if (m.msg_flags & MSG_TRUNC)
1937 {
1938 log(L_WARN "Netlink got truncated asynchronous message");
1939 return 1;
1940 }
1941 while (NLMSG_OK(h, len))
1942 {
1943 nl_async_msg(h);
1944 h = NLMSG_NEXT(h, len);
1945 }
1946 if (len)
1947 log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
1948 return 1;
1949 }
1950
1951 static void
1952 nl_async_err_hook(sock *sk, int e UNUSED)
1953 {
1954 nl_async_hook(sk, 0);
1955 }
1956
1957 static void
1958 nl_open_async(void)
1959 {
1960 sock *sk;
1961 struct sockaddr_nl sa;
1962 int fd;
1963
1964 if (nl_async_sk)
1965 return;
1966
1967 DBG("KRT: Opening async netlink socket\n");
1968
1969 fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1970 if (fd < 0)
1971 {
1972 log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
1973 return;
1974 }
1975
1976 bzero(&sa, sizeof(sa));
1977 sa.nl_family = AF_NETLINK;
1978 sa.nl_groups = RTMGRP_LINK |
1979 RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE |
1980 RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
1981
1982 if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
1983 {
1984 log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
1985 close(fd);
1986 return;
1987 }
1988
1989 nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
1990
1991 sk = nl_async_sk = sk_new(krt_pool);
1992 sk->type = SK_MAGIC;
1993 sk->rx_hook = nl_async_hook;
1994 sk->err_hook = nl_async_err_hook;
1995 sk->fd = fd;
1996 if (sk_open(sk) < 0)
1997 bug("Netlink: sk_open failed");
1998 }
1999
2000
2001 /*
2002 * Interface to the UNIX krt module
2003 */
2004
2005 void
2006 krt_sys_io_init(void)
2007 {
2008 nl_linpool = lp_new_default(krt_pool);
2009 HASH_INIT(nl_table_map, krt_pool, 6);
2010 }
2011
2012 int
2013 krt_sys_start(struct krt_proto *p)
2014 {
2015 struct krt_proto *old = HASH_FIND(nl_table_map, RTH, p->af, krt_table_id(p));
2016
2017 if (old)
2018 {
2019 log(L_ERR "%s: Kernel table %u already registered by %s",
2020 p->p.name, krt_table_id(p), old->p.name);
2021 return 0;
2022 }
2023
2024 HASH_INSERT2(nl_table_map, RTH, krt_pool, p);
2025
2026 nl_open();
2027 nl_open_async();
2028
2029 return 1;
2030 }
2031
2032 void
2033 krt_sys_shutdown(struct krt_proto *p)
2034 {
2035 HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
2036 }
2037
2038 int
2039 krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
2040 {
2041 return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
2042 }
2043
2044 void
2045 krt_sys_init_config(struct krt_config *cf)
2046 {
2047 cf->sys.table_id = RT_TABLE_MAIN;
2048 cf->sys.metric = 32;
2049 }
2050
2051 void
2052 krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
2053 {
2054 d->sys.table_id = s->sys.table_id;
2055 d->sys.metric = s->sys.metric;
2056 }
2057
2058 static const char *krt_metrics_names[KRT_METRICS_MAX] = {
2059 NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
2060 "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
2061 };
2062
2063 static const char *krt_features_names[KRT_FEATURES_MAX] = {
2064 "ecn", NULL, NULL, "allfrag"
2065 };
2066
2067 int
2068 krt_sys_get_attr(eattr *a, byte *buf, int buflen UNUSED)
2069 {
2070 switch (a->id)
2071 {
2072 case EA_KRT_PREFSRC:
2073 bsprintf(buf, "prefsrc");
2074 return GA_NAME;
2075
2076 case EA_KRT_REALM:
2077 bsprintf(buf, "realm");
2078 return GA_NAME;
2079
2080 case EA_KRT_SCOPE:
2081 bsprintf(buf, "scope");
2082 return GA_NAME;
2083
2084 case EA_KRT_LOCK:
2085 buf += bsprintf(buf, "lock:");
2086 ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
2087 return GA_FULL;
2088
2089 case EA_KRT_FEATURES:
2090 buf += bsprintf(buf, "features:");
2091 ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
2092 return GA_FULL;
2093
2094 default:;
2095 int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET;
2096 if (id > 0 && id < KRT_METRICS_MAX)
2097 {
2098 bsprintf(buf, "%s", krt_metrics_names[id]);
2099 return GA_NAME;
2100 }
2101
2102 return GA_UNKNOWN;
2103 }
2104 }
2105
2106
2107
2108 void
2109 kif_sys_start(struct kif_proto *p UNUSED)
2110 {
2111 nl_open();
2112 nl_open_async();
2113 }
2114
2115 void
2116 kif_sys_shutdown(struct kif_proto *p UNUSED)
2117 {
2118 }
2119
2120 int
2121 kif_update_sysdep_addr(struct iface *i UNUSED)
2122 {
2123 return 0;
2124 }