]> git.ipfire.org Git - thirdparty/bird.git/blob - sysdep/linux/netlink.c
834504d07b5d2ee4c81740c31c6084e6b922afd8
[thirdparty/bird.git] / sysdep / linux / netlink.c
1 /*
2 * BIRD -- Linux Netlink Interface
3 *
4 * (c) 1999--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9 #include <alloca.h>
10 #include <stdio.h>
11 #include <unistd.h>
12 #include <fcntl.h>
13 #include <sys/socket.h>
14 #include <sys/uio.h>
15 #include <errno.h>
16
17 #undef LOCAL_DEBUG
18
19 #include "nest/bird.h"
20 #include "nest/route.h"
21 #include "nest/protocol.h"
22 #include "nest/iface.h"
23 #include "lib/alloca.h"
24 #include "sysdep/unix/unix.h"
25 #include "sysdep/unix/krt.h"
26 #include "lib/socket.h"
27 #include "lib/string.h"
28 #include "lib/hash.h"
29 #include "conf/conf.h"
30
31 #include <asm/types.h>
32 #include <linux/if.h>
33 #include <linux/netlink.h>
34 #include <linux/rtnetlink.h>
35
36 #ifdef HAVE_MPLS_KERNEL
37 #include <linux/lwtunnel.h>
38 #endif
39
40 #ifndef MSG_TRUNC /* Hack: Several versions of glibc miss this one :( */
41 #define MSG_TRUNC 0x20
42 #endif
43
44 #ifndef IFA_FLAGS
45 #define IFA_FLAGS 8
46 #endif
47
48 #ifndef IFF_LOWER_UP
49 #define IFF_LOWER_UP 0x10000
50 #endif
51
52 #ifndef RTA_TABLE
53 #define RTA_TABLE 15
54 #endif
55
56 #ifndef RTA_VIA
57 #define RTA_VIA 18
58 #endif
59
60 #ifndef RTA_NEWDST
61 #define RTA_NEWDST 19
62 #endif
63
64 #ifndef RTA_ENCAP_TYPE
65 #define RTA_ENCAP_TYPE 21
66 #endif
67
68 #ifndef RTA_ENCAP
69 #define RTA_ENCAP 22
70 #endif
71
72 #define krt_ecmp6(p) ((p)->af == AF_INET6)
73
74 const int rt_default_ecmp = 16;
75
76 /*
77 * Structure nl_parse_state keeps state of received route processing. Ideally,
78 * we could just independently parse received Netlink messages and immediately
79 * propagate received routes to the rest of BIRD, but older Linux kernel (before
80 * version 4.11) represents and announces IPv6 ECMP routes not as one route with
81 * multiple next hops (like RTA_MULTIPATH in IPv4 ECMP), but as a sequence of
82 * routes with the same prefix. More recent kernels work as with IPv4.
83 *
84 * Therefore, BIRD keeps currently processed route in nl_parse_state structure
85 * and postpones its propagation until we expect it to be final; i.e., when
86 * non-matching route is received or when the scan ends. When another matching
87 * route is received, it is merged with the already processed route to form an
88 * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
89 * postponing is done in both cases (for simplicity). All IPv4 routes or IPv6
90 * routes with RTA_MULTIPATH set are just considered non-matching.
91 *
92 * This is ignored for asynchronous notifications (every notification is handled
93 * as a separate route). It is not an issue for our routes, as we ignore such
94 * notifications anyways. But importing alien IPv6 ECMP routes does not work
95 * properly with older kernels.
96 *
97 * Whatever the kernel version is, IPv6 ECMP routes are sent as multiple routes
98 * for the same prefix.
99 */
100
101 struct nl_parse_state
102 {
103 struct linpool *pool;
104 int scan;
105 int merge;
106
107 net *net;
108 rta *attrs;
109 struct krt_proto *proto;
110 s8 new;
111 s8 krt_src;
112 u8 krt_type;
113 u8 krt_proto;
114 u32 krt_metric;
115 };
116
117 /*
118 * Synchronous Netlink interface
119 */
120
121 struct nl_sock
122 {
123 int fd;
124 u32 seq;
125 byte *rx_buffer; /* Receive buffer */
126 struct nlmsghdr *last_hdr; /* Recently received packet */
127 uint last_size;
128 };
129
130 #define NL_RX_SIZE 8192
131
132 #define NL_OP_DELETE 0
133 #define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
134 #define NL_OP_REPLACE (NLM_F_CREATE|NLM_F_REPLACE)
135 #define NL_OP_APPEND (NLM_F_CREATE|NLM_F_APPEND)
136
137 static linpool *nl_linpool;
138
139 static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for synchronous scan */
140 static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */
141
142 static void
143 nl_open_sock(struct nl_sock *nl)
144 {
145 if (nl->fd < 0)
146 {
147 nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
148 if (nl->fd < 0)
149 die("Unable to open rtnetlink socket: %m");
150 nl->seq = (u32) (current_time() TO_S); /* Or perhaps random_u32() ? */
151 nl->rx_buffer = xmalloc(NL_RX_SIZE);
152 nl->last_hdr = NULL;
153 nl->last_size = 0;
154 }
155 }
156
157 static void
158 nl_open(void)
159 {
160 nl_open_sock(&nl_scan);
161 nl_open_sock(&nl_req);
162 }
163
164 static void
165 nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
166 {
167 struct sockaddr_nl sa;
168
169 memset(&sa, 0, sizeof(sa));
170 sa.nl_family = AF_NETLINK;
171 nh->nlmsg_pid = 0;
172 nh->nlmsg_seq = ++(nl->seq);
173 if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
174 die("rtnetlink sendto: %m");
175 nl->last_hdr = NULL;
176 }
177
178 static void
179 nl_request_dump(int af, int cmd)
180 {
181 struct {
182 struct nlmsghdr nh;
183 struct rtgenmsg g;
184 } req = {
185 .nh.nlmsg_type = cmd,
186 .nh.nlmsg_len = sizeof(req),
187 .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
188 .g.rtgen_family = af
189 };
190 nl_send(&nl_scan, &req.nh);
191 }
192
193 static struct nlmsghdr *
194 nl_get_reply(struct nl_sock *nl)
195 {
196 for(;;)
197 {
198 if (!nl->last_hdr)
199 {
200 struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
201 struct sockaddr_nl sa;
202 struct msghdr m = {
203 .msg_name = &sa,
204 .msg_namelen = sizeof(sa),
205 .msg_iov = &iov,
206 .msg_iovlen = 1,
207 };
208 int x = recvmsg(nl->fd, &m, 0);
209 if (x < 0)
210 die("nl_get_reply: %m");
211 if (sa.nl_pid) /* It isn't from the kernel */
212 {
213 DBG("Non-kernel packet\n");
214 continue;
215 }
216 nl->last_size = x;
217 nl->last_hdr = (void *) nl->rx_buffer;
218 if (m.msg_flags & MSG_TRUNC)
219 bug("nl_get_reply: got truncated reply which should be impossible");
220 }
221 if (NLMSG_OK(nl->last_hdr, nl->last_size))
222 {
223 struct nlmsghdr *h = nl->last_hdr;
224 nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
225 if (h->nlmsg_seq != nl->seq)
226 {
227 log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
228 h->nlmsg_seq, nl->seq);
229 continue;
230 }
231 return h;
232 }
233 if (nl->last_size)
234 log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
235 nl->last_hdr = NULL;
236 }
237 }
238
239 static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
240
241 static int
242 nl_error(struct nlmsghdr *h, int ignore_esrch)
243 {
244 struct nlmsgerr *e;
245 int ec;
246
247 if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
248 {
249 log(L_WARN "Netlink: Truncated error message received");
250 return ENOBUFS;
251 }
252 e = (struct nlmsgerr *) NLMSG_DATA(h);
253 ec = -e->error;
254 if (ec && !(ignore_esrch && (ec == ESRCH)))
255 log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
256 return ec;
257 }
258
259 static struct nlmsghdr *
260 nl_get_scan(void)
261 {
262 struct nlmsghdr *h = nl_get_reply(&nl_scan);
263
264 if (h->nlmsg_type == NLMSG_DONE)
265 return NULL;
266 if (h->nlmsg_type == NLMSG_ERROR)
267 {
268 nl_error(h, 0);
269 return NULL;
270 }
271 return h;
272 }
273
274 static int
275 nl_exchange(struct nlmsghdr *pkt, int ignore_esrch)
276 {
277 struct nlmsghdr *h;
278
279 nl_send(&nl_req, pkt);
280 for(;;)
281 {
282 h = nl_get_reply(&nl_req);
283 if (h->nlmsg_type == NLMSG_ERROR)
284 break;
285 log(L_WARN "nl_exchange: Unexpected reply received");
286 }
287 return nl_error(h, ignore_esrch) ? -1 : 0;
288 }
289
290 /*
291 * Netlink attributes
292 */
293
294 static int nl_attr_len;
295
296 static void *
297 nl_checkin(struct nlmsghdr *h, int lsize)
298 {
299 nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
300 if (nl_attr_len < 0)
301 {
302 log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
303 return NULL;
304 }
305 return NLMSG_DATA(h);
306 }
307
308 struct nl_want_attrs {
309 u8 defined:1;
310 u8 checksize:1;
311 u8 size;
312 };
313
314
315 #define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
316
317 static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = {
318 [IFLA_IFNAME] = { 1, 0, 0 },
319 [IFLA_MTU] = { 1, 1, sizeof(u32) },
320 [IFLA_MASTER] = { 1, 1, sizeof(u32) },
321 [IFLA_WIRELESS] = { 1, 0, 0 },
322 };
323
324
325 #define BIRD_IFA_MAX (IFA_FLAGS+1)
326
327 static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = {
328 [IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) },
329 [IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) },
330 [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) },
331 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
332 };
333
334 static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
335 [IFA_ADDRESS] = { 1, 1, sizeof(ip6_addr) },
336 [IFA_LOCAL] = { 1, 1, sizeof(ip6_addr) },
337 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
338 };
339
340
341 #define BIRD_RTA_MAX (RTA_ENCAP+1)
342
343 static struct nl_want_attrs nexthop_attr_want4[BIRD_RTA_MAX] = {
344 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
345 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
346 [RTA_ENCAP] = { 1, 0, 0 },
347 };
348
349 static struct nl_want_attrs nexthop_attr_want6[BIRD_RTA_MAX] = {
350 [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
351 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
352 [RTA_ENCAP] = { 1, 0, 0 },
353 };
354
355 #ifdef HAVE_MPLS_KERNEL
356 static struct nl_want_attrs encap_mpls_want[BIRD_RTA_MAX] = {
357 [RTA_DST] = { 1, 0, 0 },
358 };
359 #endif
360
361 static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
362 [RTA_DST] = { 1, 1, sizeof(ip4_addr) },
363 [RTA_OIF] = { 1, 1, sizeof(u32) },
364 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
365 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
366 [RTA_PREFSRC] = { 1, 1, sizeof(ip4_addr) },
367 [RTA_METRICS] = { 1, 0, 0 },
368 [RTA_MULTIPATH] = { 1, 0, 0 },
369 [RTA_FLOW] = { 1, 1, sizeof(u32) },
370 [RTA_TABLE] = { 1, 1, sizeof(u32) },
371 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
372 [RTA_ENCAP] = { 1, 0, 0 },
373 };
374
375 static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
376 [RTA_DST] = { 1, 1, sizeof(ip6_addr) },
377 [RTA_SRC] = { 1, 1, sizeof(ip6_addr) },
378 [RTA_IIF] = { 1, 1, sizeof(u32) },
379 [RTA_OIF] = { 1, 1, sizeof(u32) },
380 [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
381 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
382 [RTA_PREFSRC] = { 1, 1, sizeof(ip6_addr) },
383 [RTA_METRICS] = { 1, 0, 0 },
384 [RTA_MULTIPATH] = { 1, 0, 0 },
385 [RTA_FLOW] = { 1, 1, sizeof(u32) },
386 [RTA_TABLE] = { 1, 1, sizeof(u32) },
387 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
388 [RTA_ENCAP] = { 1, 0, 0 },
389 };
390
391 #ifdef HAVE_MPLS_KERNEL
392 static struct nl_want_attrs rtm_attr_want_mpls[BIRD_RTA_MAX] = {
393 [RTA_DST] = { 1, 1, sizeof(u32) },
394 [RTA_IIF] = { 1, 1, sizeof(u32) },
395 [RTA_OIF] = { 1, 1, sizeof(u32) },
396 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
397 [RTA_METRICS] = { 1, 0, 0 },
398 [RTA_FLOW] = { 1, 1, sizeof(u32) },
399 [RTA_TABLE] = { 1, 1, sizeof(u32) },
400 [RTA_VIA] = { 1, 0, 0 },
401 [RTA_NEWDST] = { 1, 0, 0 },
402 };
403 #endif
404
405
406 static int
407 nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, int ksize)
408 {
409 int max = ksize / sizeof(struct rtattr *);
410 bzero(k, ksize);
411
412 for ( ; RTA_OK(a, nl_attr_len); a = RTA_NEXT(a, nl_attr_len))
413 {
414 if ((a->rta_type >= max) || !want[a->rta_type].defined)
415 continue;
416
417 if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size))
418 {
419 log(L_ERR "nl_parse_attrs: Malformed attribute received");
420 return 0;
421 }
422
423 k[a->rta_type] = a;
424 }
425
426 if (nl_attr_len)
427 {
428 log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
429 return 0;
430 }
431
432 return 1;
433 }
434
435 static inline u16 rta_get_u16(struct rtattr *a)
436 { return *(u16 *) RTA_DATA(a); }
437
438 static inline u32 rta_get_u32(struct rtattr *a)
439 { return *(u32 *) RTA_DATA(a); }
440
441 static inline ip4_addr rta_get_ip4(struct rtattr *a)
442 { return ip4_ntoh(*(ip4_addr *) RTA_DATA(a)); }
443
444 static inline ip6_addr rta_get_ip6(struct rtattr *a)
445 { return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); }
446
447 static inline ip_addr rta_get_ipa(struct rtattr *a)
448 {
449 if (RTA_PAYLOAD(a) == sizeof(ip4_addr))
450 return ipa_from_ip4(rta_get_ip4(a));
451 else
452 return ipa_from_ip6(rta_get_ip6(a));
453 }
454
455 #ifdef HAVE_MPLS_KERNEL
456 static inline ip_addr rta_get_via(struct rtattr *a)
457 {
458 struct rtvia *v = RTA_DATA(a);
459 switch(v->rtvia_family) {
460 case AF_INET: return ipa_from_ip4(ip4_ntoh(*(ip4_addr *) v->rtvia_addr));
461 case AF_INET6: return ipa_from_ip6(ip6_ntoh(*(ip6_addr *) v->rtvia_addr));
462 }
463 return IPA_NONE;
464 }
465
466 static u32 rta_mpls_stack[MPLS_MAX_LABEL_STACK];
467 static inline int rta_get_mpls(struct rtattr *a, u32 *stack)
468 {
469 if (RTA_PAYLOAD(a) % 4)
470 log(L_WARN "KRT: Strange length of received MPLS stack: %u", RTA_PAYLOAD(a));
471
472 return mpls_get(RTA_DATA(a), RTA_PAYLOAD(a) & ~0x3, stack);
473 }
474 #endif
475
476 struct rtattr *
477 nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen)
478 {
479 uint pos = NLMSG_ALIGN(h->nlmsg_len);
480 uint len = RTA_LENGTH(dlen);
481
482 if (pos + len > bufsize)
483 bug("nl_add_attr: packet buffer overflow");
484
485 struct rtattr *a = (struct rtattr *)((char *)h + pos);
486 a->rta_type = code;
487 a->rta_len = len;
488 h->nlmsg_len = pos + len;
489
490 if (dlen > 0)
491 memcpy(RTA_DATA(a), data, dlen);
492
493 return a;
494 }
495
496 static inline struct rtattr *
497 nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
498 {
499 return nl_add_attr(h, bufsize, code, NULL, 0);
500 }
501
502 static inline void
503 nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
504 {
505 a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a;
506 }
507
508 static inline void
509 nl_add_attr_u16(struct nlmsghdr *h, uint bufsize, int code, u16 data)
510 {
511 nl_add_attr(h, bufsize, code, &data, 2);
512 }
513
514 static inline void
515 nl_add_attr_u32(struct nlmsghdr *h, uint bufsize, int code, u32 data)
516 {
517 nl_add_attr(h, bufsize, code, &data, 4);
518 }
519
520 static inline void
521 nl_add_attr_ip4(struct nlmsghdr *h, uint bufsize, int code, ip4_addr ip4)
522 {
523 ip4 = ip4_hton(ip4);
524 nl_add_attr(h, bufsize, code, &ip4, sizeof(ip4));
525 }
526
527 static inline void
528 nl_add_attr_ip6(struct nlmsghdr *h, uint bufsize, int code, ip6_addr ip6)
529 {
530 ip6 = ip6_hton(ip6);
531 nl_add_attr(h, bufsize, code, &ip6, sizeof(ip6));
532 }
533
534 static inline void
535 nl_add_attr_ipa(struct nlmsghdr *h, uint bufsize, int code, ip_addr ipa)
536 {
537 if (ipa_is_ip4(ipa))
538 nl_add_attr_ip4(h, bufsize, code, ipa_to_ip4(ipa));
539 else
540 nl_add_attr_ip6(h, bufsize, code, ipa_to_ip6(ipa));
541 }
542
543 #ifdef HAVE_MPLS_KERNEL
544 static inline void
545 nl_add_attr_mpls(struct nlmsghdr *h, uint bufsize, int code, int len, u32 *stack)
546 {
547 char buf[len*4];
548 mpls_put(buf, len, stack);
549 nl_add_attr(h, bufsize, code, buf, len*4);
550 }
551
552 static inline void
553 nl_add_attr_mpls_encap(struct nlmsghdr *h, uint bufsize, int len, u32 *stack)
554 {
555 nl_add_attr_u16(h, bufsize, RTA_ENCAP_TYPE, LWTUNNEL_ENCAP_MPLS);
556
557 struct rtattr *nest = nl_open_attr(h, bufsize, RTA_ENCAP);
558 nl_add_attr_mpls(h, bufsize, RTA_DST, len, stack);
559 nl_close_attr(h, nest);
560 }
561
562 static inline void
563 nl_add_attr_via(struct nlmsghdr *h, uint bufsize, ip_addr ipa)
564 {
565 struct rtvia *via = alloca(sizeof(struct rtvia) + 16);
566
567 if (ipa_is_ip4(ipa))
568 {
569 via->rtvia_family = AF_INET;
570 put_ip4(via->rtvia_addr, ipa_to_ip4(ipa));
571 nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 4);
572 }
573 else
574 {
575 via->rtvia_family = AF_INET6;
576 put_ip6(via->rtvia_addr, ipa_to_ip6(ipa));
577 nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 16);
578 }
579 }
580 #endif
581
582 static inline struct rtnexthop *
583 nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
584 {
585 uint pos = NLMSG_ALIGN(h->nlmsg_len);
586 uint len = RTNH_LENGTH(0);
587
588 if (pos + len > bufsize)
589 bug("nl_open_nexthop: packet buffer overflow");
590
591 h->nlmsg_len = pos + len;
592
593 return (void *)h + pos;
594 }
595
596 static inline void
597 nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh)
598 {
599 nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh;
600 }
601
602 static inline void
603 nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af UNUSED)
604 {
605 #ifdef HAVE_MPLS_KERNEL
606 if (nh->labels > 0)
607 if (af == AF_MPLS)
608 nl_add_attr_mpls(h, bufsize, RTA_NEWDST, nh->labels, nh->label);
609 else
610 nl_add_attr_mpls_encap(h, bufsize, nh->labels, nh->label);
611
612 if (ipa_nonzero(nh->gw))
613 if (af == AF_MPLS)
614 nl_add_attr_via(h, bufsize, nh->gw);
615 else
616 nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
617 #else
618
619 if (ipa_nonzero(nh->gw))
620 nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
621 #endif
622 }
623
624 static void
625 nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af)
626 {
627 struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH);
628
629 for (; nh; nh = nh->next)
630 {
631 struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize);
632
633 rtnh->rtnh_flags = 0;
634 rtnh->rtnh_hops = nh->weight;
635 rtnh->rtnh_ifindex = nh->iface->index;
636
637 nl_add_nexthop(h, bufsize, nh, af);
638
639 if (nh->flags & RNF_ONLINK)
640 rtnh->rtnh_flags |= RTNH_F_ONLINK;
641
642 nl_close_nexthop(h, rtnh);
643 }
644
645 nl_close_attr(h, a);
646 }
647
648 static struct nexthop *
649 nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr *ra, int af)
650 {
651 struct rtattr *a[BIRD_RTA_MAX];
652 struct rtnexthop *nh = RTA_DATA(ra);
653 struct nexthop *rv, *first, **last;
654 unsigned len = RTA_PAYLOAD(ra);
655
656 first = NULL;
657 last = &first;
658
659 while (len)
660 {
661 /* Use RTNH_OK(nh,len) ?? */
662 if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
663 return NULL;
664
665 *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE);
666 last = &(rv->next);
667
668 rv->weight = nh->rtnh_hops;
669 rv->iface = if_find_by_index(nh->rtnh_ifindex);
670 if (!rv->iface)
671 return NULL;
672
673 /* Nonexistent RTNH_PAYLOAD ?? */
674 nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
675 switch (af)
676 {
677 case AF_INET:
678 if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a)))
679 return NULL;
680 break;
681
682 case AF_INET6:
683 if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want6, a, sizeof(a)))
684 return NULL;
685 break;
686
687 default:
688 return NULL;
689 }
690
691 if (a[RTA_GATEWAY])
692 {
693 rv->gw = rta_get_ipa(a[RTA_GATEWAY]);
694
695 if (nh->rtnh_flags & RTNH_F_ONLINK)
696 rv->flags |= RNF_ONLINK;
697
698 neighbor *nbr;
699 nbr = neigh_find(&p->p, rv->gw, rv->iface,
700 (rv->flags & RNF_ONLINK) ? NEF_ONLINK : 0);
701 if (!nbr || (nbr->scope == SCOPE_HOST))
702 return NULL;
703 }
704 else
705 rv->gw = IPA_NONE;
706
707 #ifdef HAVE_MPLS_KERNEL
708 if (a[RTA_ENCAP_TYPE])
709 {
710 if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS) {
711 log(L_WARN "KRT: Unknown encapsulation method %d in multipath", rta_get_u16(a[RTA_ENCAP_TYPE]));
712 return NULL;
713 }
714
715 struct rtattr *enca[BIRD_RTA_MAX];
716 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
717 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
718 rv->labels = rta_get_mpls(enca[RTA_DST], rv->label);
719 break;
720 }
721 #endif
722
723
724 len -= NLMSG_ALIGN(nh->rtnh_len);
725 nh = RTNH_NEXT(nh);
726 }
727
728 return first;
729 }
730
731 static void
732 nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, int max)
733 {
734 struct rtattr *a = nl_open_attr(h, bufsize, RTA_METRICS);
735 int t;
736
737 for (t = 1; t < max; t++)
738 if (metrics[0] & (1 << t))
739 nl_add_attr_u32(h, bufsize, t, metrics[t]);
740
741 nl_close_attr(h, a);
742 }
743
744 static int
745 nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int max)
746 {
747 struct rtattr *a = RTA_DATA(hdr);
748 int len = RTA_PAYLOAD(hdr);
749
750 metrics[0] = 0;
751 for (; RTA_OK(a, len); a = RTA_NEXT(a, len))
752 {
753 if (a->rta_type == RTA_UNSPEC)
754 continue;
755
756 if (a->rta_type >= max)
757 continue;
758
759 if (RTA_PAYLOAD(a) != 4)
760 return -1;
761
762 metrics[0] |= 1 << a->rta_type;
763 metrics[a->rta_type] = rta_get_u32(a);
764 }
765
766 if (len > 0)
767 return -1;
768
769 return 0;
770 }
771
772
773 /*
774 * Scanning of interfaces
775 */
776
777 static void
778 nl_parse_link(struct nlmsghdr *h, int scan)
779 {
780 struct ifinfomsg *i;
781 struct rtattr *a[BIRD_IFLA_MAX];
782 int new = h->nlmsg_type == RTM_NEWLINK;
783 struct iface f = {};
784 struct iface *ifi;
785 char *name;
786 u32 mtu, master = 0;
787 uint fl;
788
789 if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), ifla_attr_want, a, sizeof(a)))
790 return;
791 if (!a[IFLA_IFNAME] || (RTA_PAYLOAD(a[IFLA_IFNAME]) < 2) || !a[IFLA_MTU])
792 {
793 /*
794 * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
795 * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
796 * We simply ignore all such messages with IFLA_WIRELESS without notice.
797 */
798
799 if (a[IFLA_WIRELESS])
800 return;
801
802 log(L_ERR "KIF: Malformed message received");
803 return;
804 }
805
806 name = RTA_DATA(a[IFLA_IFNAME]);
807 mtu = rta_get_u32(a[IFLA_MTU]);
808
809 if (a[IFLA_MASTER])
810 master = rta_get_u32(a[IFLA_MASTER]);
811
812 ifi = if_find_by_index(i->ifi_index);
813 if (!new)
814 {
815 DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
816 if (!ifi)
817 return;
818
819 if_delete(ifi);
820 }
821 else
822 {
823 DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
824 if (ifi && strncmp(ifi->name, name, sizeof(ifi->name)-1))
825 if_delete(ifi);
826
827 strncpy(f.name, name, sizeof(f.name)-1);
828 f.index = i->ifi_index;
829 f.mtu = mtu;
830
831 f.master_index = master;
832 f.master = if_find_by_index(master);
833
834 fl = i->ifi_flags;
835 if (fl & IFF_UP)
836 f.flags |= IF_ADMIN_UP;
837 if (fl & IFF_LOWER_UP)
838 f.flags |= IF_LINK_UP;
839 if (fl & IFF_LOOPBACK) /* Loopback */
840 f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
841 else if (fl & IFF_POINTOPOINT) /* PtP */
842 f.flags |= IF_MULTICAST;
843 else if (fl & IFF_BROADCAST) /* Broadcast */
844 f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
845 else
846 f.flags |= IF_MULTIACCESS; /* NBMA */
847
848 if (fl & IFF_MULTICAST)
849 f.flags |= IF_MULTICAST;
850
851 ifi = if_update(&f);
852
853 if (!scan)
854 if_end_partial_update(ifi);
855 }
856 }
857
858 static void
859 nl_parse_addr4(struct ifaddrmsg *i, int scan, int new)
860 {
861 struct rtattr *a[BIRD_IFA_MAX];
862 struct iface *ifi;
863 u32 ifa_flags;
864 int scope;
865
866 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a)))
867 return;
868
869 if (!a[IFA_LOCAL])
870 {
871 log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)");
872 return;
873 }
874 if (!a[IFA_ADDRESS])
875 {
876 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
877 return;
878 }
879
880 ifi = if_find_by_index(i->ifa_index);
881 if (!ifi)
882 {
883 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
884 return;
885 }
886
887 if (a[IFA_FLAGS])
888 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
889 else
890 ifa_flags = i->ifa_flags;
891
892 struct ifa ifa;
893 bzero(&ifa, sizeof(ifa));
894 ifa.iface = ifi;
895 if (ifa_flags & IFA_F_SECONDARY)
896 ifa.flags |= IA_SECONDARY;
897
898 ifa.ip = rta_get_ipa(a[IFA_LOCAL]);
899
900 if (i->ifa_prefixlen > IP4_MAX_PREFIX_LENGTH)
901 {
902 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
903 new = 0;
904 }
905 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH)
906 {
907 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
908 net_fill_ip4(&ifa.prefix, rta_get_ip4(a[IFA_ADDRESS]), i->ifa_prefixlen);
909
910 /* It is either a host address or a peer address */
911 if (ipa_equal(ifa.ip, ifa.brd))
912 ifa.flags |= IA_HOST;
913 else
914 {
915 ifa.flags |= IA_PEER;
916 ifa.opposite = ifa.brd;
917 }
918 }
919 else
920 {
921 net_fill_ip4(&ifa.prefix, ipa_to_ip4(ifa.ip), i->ifa_prefixlen);
922 net_normalize(&ifa.prefix);
923
924 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 1)
925 ifa.opposite = ipa_opposite_m1(ifa.ip);
926
927 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 2)
928 ifa.opposite = ipa_opposite_m2(ifa.ip);
929
930 if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST])
931 {
932 ip4_addr xbrd = rta_get_ip4(a[IFA_BROADCAST]);
933 ip4_addr ybrd = ip4_or(ipa_to_ip4(ifa.ip), ip4_not(ip4_mkmask(i->ifa_prefixlen)));
934
935 if (ip4_equal(xbrd, net4_prefix(&ifa.prefix)) || ip4_equal(xbrd, ybrd))
936 ifa.brd = ipa_from_ip4(xbrd);
937 else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */
938 {
939 log(L_ERR "KIF: Invalid broadcast address %I4 for %s", xbrd, ifi->name);
940 ifa.brd = ipa_from_ip4(ybrd);
941 }
942 }
943 }
944
945 scope = ipa_classify(ifa.ip);
946 if (scope < 0)
947 {
948 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
949 return;
950 }
951 ifa.scope = scope & IADDR_SCOPE_MASK;
952
953 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
954 ifi->index, ifi->name,
955 new ? "added" : "removed",
956 ifa.ip, ifa.flags, ifa.prefix, ifa.brd, ifa.opposite);
957
958 if (new)
959 ifa_update(&ifa);
960 else
961 ifa_delete(&ifa);
962
963 if (!scan)
964 if_end_partial_update(ifi);
965 }
966
967 static void
968 nl_parse_addr6(struct ifaddrmsg *i, int scan, int new)
969 {
970 struct rtattr *a[BIRD_IFA_MAX];
971 struct iface *ifi;
972 u32 ifa_flags;
973 int scope;
974
975 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a)))
976 return;
977
978 if (!a[IFA_ADDRESS])
979 {
980 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
981 return;
982 }
983
984 ifi = if_find_by_index(i->ifa_index);
985 if (!ifi)
986 {
987 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
988 return;
989 }
990
991 if (a[IFA_FLAGS])
992 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
993 else
994 ifa_flags = i->ifa_flags;
995
996 struct ifa ifa;
997 bzero(&ifa, sizeof(ifa));
998 ifa.iface = ifi;
999 if (ifa_flags & IFA_F_SECONDARY)
1000 ifa.flags |= IA_SECONDARY;
1001
1002 /* Ignore tentative addresses silently */
1003 if (ifa_flags & IFA_F_TENTATIVE)
1004 return;
1005
1006 /* IFA_LOCAL can be unset for IPv6 interfaces */
1007 ifa.ip = rta_get_ipa(a[IFA_LOCAL] ? : a[IFA_ADDRESS]);
1008
1009 if (i->ifa_prefixlen > IP6_MAX_PREFIX_LENGTH)
1010 {
1011 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
1012 new = 0;
1013 }
1014 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH)
1015 {
1016 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
1017 net_fill_ip6(&ifa.prefix, rta_get_ip6(a[IFA_ADDRESS]), i->ifa_prefixlen);
1018
1019 /* It is either a host address or a peer address */
1020 if (ipa_equal(ifa.ip, ifa.brd))
1021 ifa.flags |= IA_HOST;
1022 else
1023 {
1024 ifa.flags |= IA_PEER;
1025 ifa.opposite = ifa.brd;
1026 }
1027 }
1028 else
1029 {
1030 net_fill_ip6(&ifa.prefix, ipa_to_ip6(ifa.ip), i->ifa_prefixlen);
1031 net_normalize(&ifa.prefix);
1032
1033 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH - 1)
1034 ifa.opposite = ipa_opposite_m1(ifa.ip);
1035 }
1036
1037 scope = ipa_classify(ifa.ip);
1038 if (scope < 0)
1039 {
1040 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
1041 return;
1042 }
1043 ifa.scope = scope & IADDR_SCOPE_MASK;
1044
1045 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
1046 ifi->index, ifi->name,
1047 new ? "added" : "removed",
1048 ifa.ip, ifa.flags, ifa.prefix, ifa.brd, ifa.opposite);
1049
1050 if (new)
1051 ifa_update(&ifa);
1052 else
1053 ifa_delete(&ifa);
1054
1055 if (!scan)
1056 if_end_partial_update(ifi);
1057 }
1058
1059 static void
1060 nl_parse_addr(struct nlmsghdr *h, int scan)
1061 {
1062 struct ifaddrmsg *i;
1063
1064 if (!(i = nl_checkin(h, sizeof(*i))))
1065 return;
1066
1067 int new = (h->nlmsg_type == RTM_NEWADDR);
1068
1069 switch (i->ifa_family)
1070 {
1071 case AF_INET:
1072 return nl_parse_addr4(i, scan, new);
1073
1074 case AF_INET6:
1075 return nl_parse_addr6(i, scan, new);
1076 }
1077 }
1078
1079 void
1080 kif_do_scan(struct kif_proto *p UNUSED)
1081 {
1082 struct nlmsghdr *h;
1083
1084 if_start_update();
1085
1086 nl_request_dump(AF_UNSPEC, RTM_GETLINK);
1087 while (h = nl_get_scan())
1088 if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
1089 nl_parse_link(h, 1);
1090 else
1091 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
1092
1093 /* Re-resolve master interface for slaves */
1094 struct iface *i;
1095 WALK_LIST(i, iface_list)
1096 if (i->master_index)
1097 {
1098 struct iface f = {
1099 .flags = i->flags,
1100 .mtu = i->mtu,
1101 .index = i->index,
1102 .master_index = i->master_index,
1103 .master = if_find_by_index(i->master_index)
1104 };
1105
1106 if (f.master != i->master)
1107 {
1108 memcpy(f.name, i->name, sizeof(f.name));
1109 if_update(&f);
1110 }
1111 }
1112
1113 nl_request_dump(AF_INET, RTM_GETADDR);
1114 while (h = nl_get_scan())
1115 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
1116 nl_parse_addr(h, 1);
1117 else
1118 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
1119
1120 nl_request_dump(AF_INET6, RTM_GETADDR);
1121 while (h = nl_get_scan())
1122 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
1123 nl_parse_addr(h, 1);
1124 else
1125 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
1126
1127 if_end_update();
1128 }
1129
1130 /*
1131 * Routes
1132 */
1133
1134 static inline u32
1135 krt_table_id(struct krt_proto *p)
1136 {
1137 return KRT_CF->sys.table_id;
1138 }
1139
1140 static HASH(struct krt_proto) nl_table_map;
1141
1142 #define RTH_KEY(p) p->af, krt_table_id(p)
1143 #define RTH_NEXT(p) p->sys.hash_next
1144 #define RTH_EQ(a1,i1,a2,i2) a1 == a2 && i1 == i2
1145 #define RTH_FN(a,i) a ^ u32_hash(i)
1146
1147 #define RTH_REHASH rth_rehash
1148 #define RTH_PARAMS /8, *2, 2, 2, 6, 20
1149
1150 HASH_DEFINE_REHASH_FN(RTH, struct krt_proto)
1151
1152 int
1153 krt_capable(rte *e)
1154 {
1155 rta *a = e->attrs;
1156
1157 switch (a->dest)
1158 {
1159 case RTD_UNICAST:
1160 case RTD_BLACKHOLE:
1161 case RTD_UNREACHABLE:
1162 case RTD_PROHIBIT:
1163 return 1;
1164
1165 default:
1166 return 0;
1167 }
1168 }
1169
1170 static inline int
1171 nh_bufsize(struct nexthop *nh)
1172 {
1173 int rv = 0;
1174 for (; nh != NULL; nh = nh->next)
1175 rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
1176 return rv;
1177 }
1178
1179 static int
1180 nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh)
1181 {
1182 eattr *ea;
1183 net *net = e->net;
1184 rta *a = e->attrs;
1185 ea_list *eattrs = a->eattrs;
1186 int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh));
1187 u32 priority = 0;
1188
1189 struct {
1190 struct nlmsghdr h;
1191 struct rtmsg r;
1192 char buf[0];
1193 } *r;
1194
1195 int rsize = sizeof(*r) + bufsize;
1196 r = alloca(rsize);
1197
1198 DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op);
1199
1200 bzero(&r->h, sizeof(r->h));
1201 bzero(&r->r, sizeof(r->r));
1202 r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE;
1203 r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1204 r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK;
1205
1206 r->r.rtm_family = p->af;
1207 r->r.rtm_dst_len = net_pxlen(net->n.addr);
1208 r->r.rtm_protocol = RTPROT_BIRD;
1209 r->r.rtm_scope = RT_SCOPE_NOWHERE;
1210 #ifdef HAVE_MPLS_KERNEL
1211 if (p->af == AF_MPLS)
1212 {
1213 /*
1214 * Kernel MPLS code is a bit picky. We must:
1215 * 1) Always set RT_SCOPE_UNIVERSE and RTN_UNICAST (even for RTM_DELROUTE)
1216 * 2) Never use RTA_PRIORITY
1217 */
1218
1219 u32 label = net_mpls(net->n.addr);
1220 nl_add_attr_mpls(&r->h, rsize, RTA_DST, 1, &label);
1221 r->r.rtm_scope = RT_SCOPE_UNIVERSE;
1222 r->r.rtm_type = RTN_UNICAST;
1223 }
1224 else
1225 #endif
1226 {
1227 nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr));
1228
1229 /* Add source address for IPv6 SADR routes */
1230 if (net->n.addr->type == NET_IP6_SADR)
1231 {
1232 net_addr_ip6_sadr *a = (void *) &net->n.addr;
1233 nl_add_attr_ip6(&r->h, rsize, RTA_SRC, a->src_prefix);
1234 r->r.rtm_src_len = a->src_pxlen;
1235 }
1236 }
1237
1238 /*
1239 * Strange behavior for RTM_DELROUTE:
1240 * 1) rtm_family is ignored in IPv6, works for IPv4
1241 * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
1242 * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
1243 */
1244
1245 if (krt_table_id(p) < 256)
1246 r->r.rtm_table = krt_table_id(p);
1247 else
1248 nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p));
1249
1250 if (p->af == AF_MPLS)
1251 priority = 0;
1252 else if (a->source == RTS_DUMMY)
1253 priority = e->u.krt.metric;
1254 else if (KRT_CF->sys.metric)
1255 priority = KRT_CF->sys.metric;
1256 else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC)))
1257 priority = ea->u.data;
1258
1259 if (priority)
1260 nl_add_attr_u32(&r->h, rsize, RTA_PRIORITY, priority);
1261
1262 /* For route delete, we do not specify remaining route attributes */
1263 if (op == NL_OP_DELETE)
1264 goto dest;
1265
1266 /* Default scope is LINK for device routes, UNIVERSE otherwise */
1267 if (p->af == AF_MPLS)
1268 r->r.rtm_scope = RT_SCOPE_UNIVERSE;
1269 else if (ea = ea_find(eattrs, EA_KRT_SCOPE))
1270 r->r.rtm_scope = ea->u.data;
1271 else
1272 r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
1273
1274 if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
1275 nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
1276
1277 if (ea = ea_find(eattrs, EA_KRT_REALM))
1278 nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data);
1279
1280
1281 u32 metrics[KRT_METRICS_MAX];
1282 metrics[0] = 0;
1283
1284 struct ea_walk_state ews = { .eattrs = eattrs };
1285 while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX))
1286 {
1287 int id = ea->id - EA_KRT_METRICS;
1288 metrics[0] |= 1 << id;
1289 metrics[id] = ea->u.data;
1290 }
1291
1292 if (metrics[0])
1293 nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX);
1294
1295
1296 dest:
1297 switch (dest)
1298 {
1299 case RTD_UNICAST:
1300 r->r.rtm_type = RTN_UNICAST;
1301 if (nh->next && !krt_ecmp6(p))
1302 nl_add_multipath(&r->h, rsize, nh, p->af);
1303 else
1304 {
1305 nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index);
1306 nl_add_nexthop(&r->h, rsize, nh, p->af);
1307
1308 if (nh->flags & RNF_ONLINK)
1309 r->r.rtm_flags |= RTNH_F_ONLINK;
1310 }
1311 break;
1312 case RTD_BLACKHOLE:
1313 r->r.rtm_type = RTN_BLACKHOLE;
1314 break;
1315 case RTD_UNREACHABLE:
1316 r->r.rtm_type = RTN_UNREACHABLE;
1317 break;
1318 case RTD_PROHIBIT:
1319 r->r.rtm_type = RTN_PROHIBIT;
1320 break;
1321 case RTD_NONE:
1322 break;
1323 default:
1324 bug("krt_capable inconsistent with nl_send_route");
1325 }
1326
1327 /* Ignore missing for DELETE */
1328 return nl_exchange(&r->h, (op == NL_OP_DELETE));
1329 }
1330
1331 static inline int
1332 nl_add_rte(struct krt_proto *p, rte *e)
1333 {
1334 rta *a = e->attrs;
1335 int err = 0;
1336
1337 if (krt_ecmp6(p) && a->nh.next)
1338 {
1339 struct nexthop *nh = &(a->nh);
1340
1341 err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, nh);
1342 if (err < 0)
1343 return err;
1344
1345 for (nh = nh->next; nh; nh = nh->next)
1346 err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, nh);
1347
1348 return err;
1349 }
1350
1351 return nl_send_route(p, e, NL_OP_ADD, a->dest, &(a->nh));
1352 }
1353
1354 static inline int
1355 nl_delete_rte(struct krt_proto *p, rte *e)
1356 {
1357 int err = 0;
1358
1359 /* For IPv6, we just repeatedly request DELETE until we get error */
1360 do
1361 err = nl_send_route(p, e, NL_OP_DELETE, RTD_NONE, NULL);
1362 while (krt_ecmp6(p) && !err);
1363
1364 return err;
1365 }
1366
1367 void
1368 krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old)
1369 {
1370 int err = 0;
1371
1372 /*
1373 * We could use NL_OP_REPLACE, but route replace on Linux has some problems:
1374 *
1375 * 1) Does not check for matching rtm_protocol
1376 * 2) Has broken semantics for IPv6 ECMP
1377 * 3) Crashes some kernel version when used for IPv6 ECMP
1378 *
1379 * So we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the old
1380 * route value, so we do not try to optimize IPv6 ECMP reconfigurations.
1381 */
1382
1383 if (old)
1384 nl_delete_rte(p, old);
1385
1386 if (new)
1387 err = nl_add_rte(p, new);
1388
1389 if (err < 0)
1390 n->n.flags |= KRF_SYNC_ERROR;
1391 else
1392 n->n.flags &= ~KRF_SYNC_ERROR;
1393 }
1394
1395 static int
1396 nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type)
1397 {
1398 /* Route merging must be active */
1399 if (!s->merge)
1400 return 0;
1401
1402 /* Saved and new route must have same network, proto/table, and priority */
1403 if ((s->net != net) || (s->proto != p) || (s->krt_metric != priority))
1404 return 0;
1405
1406 /* Both must be regular unicast routes */
1407 if ((s->krt_type != RTN_UNICAST) || (krt_type != RTN_UNICAST))
1408 return 0;
1409
1410 return 1;
1411 }
1412
1413 static void
1414 nl_announce_route(struct nl_parse_state *s)
1415 {
1416 rte *e = rte_get_temp(s->attrs);
1417 e->net = s->net;
1418 e->u.krt.src = s->krt_src;
1419 e->u.krt.proto = s->krt_proto;
1420 e->u.krt.seen = 0;
1421 e->u.krt.best = 0;
1422 e->u.krt.metric = s->krt_metric;
1423
1424 if (s->scan)
1425 krt_got_route(s->proto, e);
1426 else
1427 krt_got_route_async(s->proto, e, s->new);
1428
1429 s->net = NULL;
1430 s->attrs = NULL;
1431 s->proto = NULL;
1432 lp_flush(s->pool);
1433 }
1434
1435 static inline void
1436 nl_parse_begin(struct nl_parse_state *s, int scan, int merge)
1437 {
1438 memset(s, 0, sizeof (struct nl_parse_state));
1439 s->pool = nl_linpool;
1440 s->scan = scan;
1441 s->merge = merge;
1442 }
1443
1444 static inline void
1445 nl_parse_end(struct nl_parse_state *s)
1446 {
1447 if (s->net)
1448 nl_announce_route(s);
1449 }
1450
1451
1452 #define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
1453
1454 static void
1455 nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
1456 {
1457 struct krt_proto *p;
1458 struct rtmsg *i;
1459 struct rtattr *a[BIRD_RTA_MAX];
1460 int new = h->nlmsg_type == RTM_NEWROUTE;
1461
1462 net_addr dst, src = {};
1463 u32 oif = ~0;
1464 u32 table_id;
1465 u32 priority = 0;
1466 u32 def_scope = RT_SCOPE_UNIVERSE;
1467 int krt_src;
1468
1469 if (!(i = nl_checkin(h, sizeof(*i))))
1470 return;
1471
1472 switch (i->rtm_family)
1473 {
1474 case AF_INET:
1475 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a)))
1476 return;
1477
1478 if (a[RTA_DST])
1479 net_fill_ip4(&dst, rta_get_ip4(a[RTA_DST]), i->rtm_dst_len);
1480 else
1481 net_fill_ip4(&dst, IP4_NONE, 0);
1482 break;
1483
1484 case AF_INET6:
1485 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a)))
1486 return;
1487
1488 if (a[RTA_DST])
1489 net_fill_ip6(&dst, rta_get_ip6(a[RTA_DST]), i->rtm_dst_len);
1490 else
1491 net_fill_ip6(&dst, IP6_NONE, 0);
1492
1493 if (a[RTA_SRC])
1494 net_fill_ip6(&src, rta_get_ip6(a[RTA_SRC]), i->rtm_src_len);
1495 else
1496 net_fill_ip6(&src, IP6_NONE, 0);
1497 break;
1498
1499 #ifdef HAVE_MPLS_KERNEL
1500 case AF_MPLS:
1501 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want_mpls, a, sizeof(a)))
1502 return;
1503
1504 if (!a[RTA_DST])
1505 SKIP("MPLS route without RTA_DST");
1506
1507 if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) != 1)
1508 SKIP("MPLS route with multi-label RTA_DST");
1509
1510 net_fill_mpls(&dst, rta_mpls_stack[0]);
1511 break;
1512 #endif
1513
1514 default:
1515 return;
1516 }
1517
1518 if (a[RTA_OIF])
1519 oif = rta_get_u32(a[RTA_OIF]);
1520
1521 if (a[RTA_TABLE])
1522 table_id = rta_get_u32(a[RTA_TABLE]);
1523 else
1524 table_id = i->rtm_table;
1525
1526 /* Do we know this table? */
1527 p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id);
1528 if (!p)
1529 SKIP("unknown table %d\n", table);
1530
1531 if (a[RTA_SRC] && (p->p.net_type != NET_IP6_SADR))
1532 SKIP("src prefix for non-SADR channel\n");
1533
1534 if (a[RTA_IIF])
1535 SKIP("IIF set\n");
1536
1537 if (i->rtm_tos != 0) /* We don't support TOS */
1538 SKIP("TOS %02x\n", i->rtm_tos);
1539
1540 if (s->scan && !new)
1541 SKIP("RTM_DELROUTE in scan\n");
1542
1543 if (a[RTA_PRIORITY])
1544 priority = rta_get_u32(a[RTA_PRIORITY]);
1545
1546 int c = net_classify(&dst);
1547 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
1548 SKIP("strange class/scope\n");
1549
1550 switch (i->rtm_protocol)
1551 {
1552 case RTPROT_UNSPEC:
1553 SKIP("proto unspec\n");
1554
1555 case RTPROT_REDIRECT:
1556 krt_src = KRT_SRC_REDIRECT;
1557 break;
1558
1559 case RTPROT_KERNEL:
1560 krt_src = KRT_SRC_KERNEL;
1561 return;
1562
1563 case RTPROT_BIRD:
1564 if (!s->scan)
1565 SKIP("echo\n");
1566 krt_src = KRT_SRC_BIRD;
1567 break;
1568
1569 case RTPROT_BOOT:
1570 default:
1571 krt_src = KRT_SRC_ALIEN;
1572 }
1573
1574 net_addr *n = &dst;
1575 if (p->p.net_type == NET_IP6_SADR)
1576 {
1577 n = alloca(sizeof(net_addr_ip6_sadr));
1578 net_fill_ip6_sadr(n, net6_prefix(&dst), net6_pxlen(&dst),
1579 net6_prefix(&src), net6_pxlen(&src));
1580 }
1581
1582 net *net = net_get(p->p.main_channel->table, n);
1583
1584 if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type))
1585 nl_announce_route(s);
1586
1587 rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE);
1588 ra->src = p->p.main_source;
1589 ra->source = RTS_INHERIT;
1590 ra->scope = SCOPE_UNIVERSE;
1591
1592 switch (i->rtm_type)
1593 {
1594 case RTN_UNICAST:
1595 ra->dest = RTD_UNICAST;
1596
1597 if (a[RTA_MULTIPATH])
1598 {
1599 struct nexthop *nh = nl_parse_multipath(s, p, a[RTA_MULTIPATH], i->rtm_family);
1600 if (!nh)
1601 {
1602 log(L_ERR "KRT: Received strange multipath route %N", net->n.addr);
1603 return;
1604 }
1605
1606 ra->nh = *nh;
1607 break;
1608 }
1609
1610 ra->nh.iface = if_find_by_index(oif);
1611 if (!ra->nh.iface)
1612 {
1613 log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif);
1614 return;
1615 }
1616
1617 if ((i->rtm_family != AF_MPLS) && a[RTA_GATEWAY]
1618 #ifdef HAVE_MPLS_KERNEL
1619 || (i->rtm_family == AF_MPLS) && a[RTA_VIA]
1620 #endif
1621 )
1622 {
1623 #ifdef HAVE_MPLS_KERNEL
1624 if (i->rtm_family == AF_MPLS)
1625 ra->nh.gw = rta_get_via(a[RTA_VIA]);
1626 else
1627 #endif
1628 ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]);
1629
1630 /* Silently skip strange 6to4 routes */
1631 const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96);
1632 if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit))
1633 return;
1634
1635 if (i->rtm_flags & RTNH_F_ONLINK)
1636 ra->nh.flags |= RNF_ONLINK;
1637
1638 neighbor *nbr;
1639 nbr = neigh_find(&p->p, ra->nh.gw, ra->nh.iface,
1640 (ra->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0);
1641 if (!nbr || (nbr->scope == SCOPE_HOST))
1642 {
1643 log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr,
1644 ra->nh.gw);
1645 return;
1646 }
1647 }
1648
1649 break;
1650 case RTN_BLACKHOLE:
1651 ra->dest = RTD_BLACKHOLE;
1652 break;
1653 case RTN_UNREACHABLE:
1654 ra->dest = RTD_UNREACHABLE;
1655 break;
1656 case RTN_PROHIBIT:
1657 ra->dest = RTD_PROHIBIT;
1658 break;
1659 /* FIXME: What about RTN_THROW? */
1660 default:
1661 SKIP("type %d\n", i->rtm_type);
1662 return;
1663 }
1664
1665 #ifdef HAVE_MPLS_KERNEL
1666 int labels = 0;
1667 if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next)
1668 labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label);
1669
1670 if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next)
1671 {
1672 switch (rta_get_u16(a[RTA_ENCAP_TYPE]))
1673 {
1674 case LWTUNNEL_ENCAP_MPLS:
1675 {
1676 struct rtattr *enca[BIRD_RTA_MAX];
1677 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
1678 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
1679 labels = rta_get_mpls(enca[RTA_DST], ra->nh.label);
1680 break;
1681 }
1682 default:
1683 SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE]));
1684 break;
1685 }
1686 }
1687
1688 if (labels < 0)
1689 {
1690 log(L_WARN "KRT: Too long MPLS stack received, ignoring.");
1691 ra->nh.labels = 0;
1692 }
1693 else
1694 ra->nh.labels = labels;
1695 #endif
1696
1697 if (i->rtm_scope != def_scope)
1698 {
1699 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1700 ea->next = ra->eattrs;
1701 ra->eattrs = ea;
1702 ea->flags = EALF_SORTED;
1703 ea->count = 1;
1704 ea->attrs[0].id = EA_KRT_SCOPE;
1705 ea->attrs[0].flags = 0;
1706 ea->attrs[0].type = EAF_TYPE_INT;
1707 ea->attrs[0].u.data = i->rtm_scope;
1708 }
1709
1710 if (a[RTA_PREFSRC])
1711 {
1712 ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]);
1713
1714 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1715 ea->next = ra->eattrs;
1716 ra->eattrs = ea;
1717 ea->flags = EALF_SORTED;
1718 ea->count = 1;
1719 ea->attrs[0].id = EA_KRT_PREFSRC;
1720 ea->attrs[0].flags = 0;
1721 ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
1722 ea->attrs[0].u.ptr = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
1723 ea->attrs[0].u.ptr->length = sizeof(ps);
1724 memcpy(ea->attrs[0].u.ptr->data, &ps, sizeof(ps));
1725 }
1726
1727 if (a[RTA_FLOW])
1728 {
1729 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1730 ea->next = ra->eattrs;
1731 ra->eattrs = ea;
1732 ea->flags = EALF_SORTED;
1733 ea->count = 1;
1734 ea->attrs[0].id = EA_KRT_REALM;
1735 ea->attrs[0].flags = 0;
1736 ea->attrs[0].type = EAF_TYPE_INT;
1737 ea->attrs[0].u.data = rta_get_u32(a[RTA_FLOW]);
1738 }
1739
1740 if (a[RTA_METRICS])
1741 {
1742 u32 metrics[KRT_METRICS_MAX];
1743 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
1744 int t, n = 0;
1745
1746 if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
1747 {
1748 log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net->n.addr);
1749 return;
1750 }
1751
1752 for (t = 1; t < KRT_METRICS_MAX; t++)
1753 if (metrics[0] & (1 << t))
1754 {
1755 ea->attrs[n].id = EA_CODE(PROTOCOL_KERNEL, KRT_METRICS_OFFSET + t);
1756 ea->attrs[n].flags = 0;
1757 ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */
1758 ea->attrs[n].u.data = metrics[t];
1759 n++;
1760 }
1761
1762 if (n > 0)
1763 {
1764 ea->next = ra->eattrs;
1765 ea->flags = EALF_SORTED;
1766 ea->count = n;
1767 ra->eattrs = ea;
1768 }
1769 }
1770
1771 /*
1772 * Ideally, now we would send the received route to the rest of kernel code.
1773 * But IPv6 ECMP routes before 4.11 are sent as a sequence of routes, so we
1774 * postpone it and merge next hops until the end of the sequence. Note that
1775 * when doing merging of next hops, we expect the new route to be unipath.
1776 * Otherwise, we ignore additional next hops in nexthop_insert().
1777 */
1778
1779 if (!s->net)
1780 {
1781 /* Store the new route */
1782 s->net = net;
1783 s->attrs = ra;
1784 s->proto = p;
1785 s->new = new;
1786 s->krt_src = krt_src;
1787 s->krt_type = i->rtm_type;
1788 s->krt_proto = i->rtm_protocol;
1789 s->krt_metric = priority;
1790 }
1791 else
1792 {
1793 /* Merge next hops with the stored route */
1794 rta *oa = s->attrs;
1795
1796 struct nexthop *nhs = &oa->nh;
1797 nexthop_insert(&nhs, &ra->nh);
1798
1799 /* Perhaps new nexthop is inserted at the first position */
1800 if (nhs == &ra->nh)
1801 {
1802 /* Swap rtas */
1803 s->attrs = ra;
1804
1805 /* Keep old eattrs */
1806 ra->eattrs = oa->eattrs;
1807 }
1808 }
1809 }
1810
1811 void
1812 krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
1813 {
1814 struct nlmsghdr *h;
1815 struct nl_parse_state s;
1816
1817 nl_parse_begin(&s, 1, 0);
1818 nl_request_dump(AF_INET, RTM_GETROUTE);
1819 while (h = nl_get_scan())
1820 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
1821 nl_parse_route(&s, h);
1822 else
1823 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
1824 nl_parse_end(&s);
1825
1826 nl_parse_begin(&s, 1, 1);
1827 nl_request_dump(AF_INET6, RTM_GETROUTE);
1828 while (h = nl_get_scan())
1829 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
1830 nl_parse_route(&s, h);
1831 else
1832 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
1833 nl_parse_end(&s);
1834
1835 #ifdef HAVE_MPLS_KERNEL
1836 nl_parse_begin(&s, 1, 1);
1837 nl_request_dump(AF_MPLS, RTM_GETROUTE);
1838 while (h = nl_get_scan())
1839 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
1840 nl_parse_route(&s, h);
1841 else
1842 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
1843 nl_parse_end(&s);
1844 #endif
1845 }
1846
1847 /*
1848 * Asynchronous Netlink interface
1849 */
1850
1851 static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
1852 static byte *nl_async_rx_buffer; /* Receive buffer */
1853
1854 static void
1855 nl_async_msg(struct nlmsghdr *h)
1856 {
1857 struct nl_parse_state s;
1858
1859 switch (h->nlmsg_type)
1860 {
1861 case RTM_NEWROUTE:
1862 case RTM_DELROUTE:
1863 DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
1864 nl_parse_begin(&s, 0, 0);
1865 nl_parse_route(&s, h);
1866 nl_parse_end(&s);
1867 break;
1868 case RTM_NEWLINK:
1869 case RTM_DELLINK:
1870 DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
1871 if (kif_proto)
1872 nl_parse_link(h, 0);
1873 break;
1874 case RTM_NEWADDR:
1875 case RTM_DELADDR:
1876 DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
1877 if (kif_proto)
1878 nl_parse_addr(h, 0);
1879 break;
1880 default:
1881 DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
1882 }
1883 }
1884
1885 static int
1886 nl_async_hook(sock *sk, uint size UNUSED)
1887 {
1888 struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
1889 struct sockaddr_nl sa;
1890 struct msghdr m = {
1891 .msg_name = &sa,
1892 .msg_namelen = sizeof(sa),
1893 .msg_iov = &iov,
1894 .msg_iovlen = 1,
1895 };
1896 struct nlmsghdr *h;
1897 int x;
1898 uint len;
1899
1900 x = recvmsg(sk->fd, &m, 0);
1901 if (x < 0)
1902 {
1903 if (errno == ENOBUFS)
1904 {
1905 /*
1906 * Netlink reports some packets have been thrown away.
1907 * One day we might react to it by asking for route table
1908 * scan in near future.
1909 */
1910 log(L_WARN "Kernel dropped some netlink messages, will resync on next scan.");
1911 return 1; /* More data are likely to be ready */
1912 }
1913 else if (errno != EWOULDBLOCK)
1914 log(L_ERR "Netlink recvmsg: %m");
1915 return 0;
1916 }
1917 if (sa.nl_pid) /* It isn't from the kernel */
1918 {
1919 DBG("Non-kernel packet\n");
1920 return 1;
1921 }
1922 h = (void *) nl_async_rx_buffer;
1923 len = x;
1924 if (m.msg_flags & MSG_TRUNC)
1925 {
1926 log(L_WARN "Netlink got truncated asynchronous message");
1927 return 1;
1928 }
1929 while (NLMSG_OK(h, len))
1930 {
1931 nl_async_msg(h);
1932 h = NLMSG_NEXT(h, len);
1933 }
1934 if (len)
1935 log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
1936 return 1;
1937 }
1938
1939 static void
1940 nl_async_err_hook(sock *sk, int e UNUSED)
1941 {
1942 nl_async_hook(sk, 0);
1943 }
1944
1945 static void
1946 nl_open_async(void)
1947 {
1948 sock *sk;
1949 struct sockaddr_nl sa;
1950 int fd;
1951
1952 if (nl_async_sk)
1953 return;
1954
1955 DBG("KRT: Opening async netlink socket\n");
1956
1957 fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1958 if (fd < 0)
1959 {
1960 log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
1961 return;
1962 }
1963
1964 bzero(&sa, sizeof(sa));
1965 sa.nl_family = AF_NETLINK;
1966 sa.nl_groups = RTMGRP_LINK |
1967 RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE |
1968 RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
1969
1970 if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
1971 {
1972 log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
1973 close(fd);
1974 return;
1975 }
1976
1977 nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
1978
1979 sk = nl_async_sk = sk_new(krt_pool);
1980 sk->type = SK_MAGIC;
1981 sk->rx_hook = nl_async_hook;
1982 sk->err_hook = nl_async_err_hook;
1983 sk->fd = fd;
1984 if (sk_open(sk) < 0)
1985 bug("Netlink: sk_open failed");
1986 }
1987
1988
1989 /*
1990 * Interface to the UNIX krt module
1991 */
1992
1993 void
1994 krt_sys_io_init(void)
1995 {
1996 nl_linpool = lp_new_default(krt_pool);
1997 HASH_INIT(nl_table_map, krt_pool, 6);
1998 }
1999
2000 int
2001 krt_sys_start(struct krt_proto *p)
2002 {
2003 struct krt_proto *old = HASH_FIND(nl_table_map, RTH, p->af, krt_table_id(p));
2004
2005 if (old)
2006 {
2007 log(L_ERR "%s: Kernel table %u already registered by %s",
2008 p->p.name, krt_table_id(p), old->p.name);
2009 return 0;
2010 }
2011
2012 HASH_INSERT2(nl_table_map, RTH, krt_pool, p);
2013
2014 nl_open();
2015 nl_open_async();
2016
2017 return 1;
2018 }
2019
2020 void
2021 krt_sys_shutdown(struct krt_proto *p)
2022 {
2023 HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
2024 }
2025
2026 int
2027 krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
2028 {
2029 return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
2030 }
2031
2032 void
2033 krt_sys_init_config(struct krt_config *cf)
2034 {
2035 cf->sys.table_id = RT_TABLE_MAIN;
2036 cf->sys.metric = 32;
2037 }
2038
2039 void
2040 krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
2041 {
2042 d->sys.table_id = s->sys.table_id;
2043 d->sys.metric = s->sys.metric;
2044 }
2045
2046 static const char *krt_metrics_names[KRT_METRICS_MAX] = {
2047 NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
2048 "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
2049 };
2050
2051 static const char *krt_features_names[KRT_FEATURES_MAX] = {
2052 "ecn", NULL, NULL, "allfrag"
2053 };
2054
2055 int
2056 krt_sys_get_attr(eattr *a, byte *buf, int buflen UNUSED)
2057 {
2058 switch (a->id)
2059 {
2060 case EA_KRT_PREFSRC:
2061 bsprintf(buf, "prefsrc");
2062 return GA_NAME;
2063
2064 case EA_KRT_REALM:
2065 bsprintf(buf, "realm");
2066 return GA_NAME;
2067
2068 case EA_KRT_SCOPE:
2069 bsprintf(buf, "scope");
2070 return GA_NAME;
2071
2072 case EA_KRT_LOCK:
2073 buf += bsprintf(buf, "lock:");
2074 ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
2075 return GA_FULL;
2076
2077 case EA_KRT_FEATURES:
2078 buf += bsprintf(buf, "features:");
2079 ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
2080 return GA_FULL;
2081
2082 default:;
2083 int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET;
2084 if (id > 0 && id < KRT_METRICS_MAX)
2085 {
2086 bsprintf(buf, "%s", krt_metrics_names[id]);
2087 return GA_NAME;
2088 }
2089
2090 return GA_UNKNOWN;
2091 }
2092 }
2093
2094
2095
2096 void
2097 kif_sys_start(struct kif_proto *p UNUSED)
2098 {
2099 nl_open();
2100 nl_open_async();
2101 }
2102
2103 void
2104 kif_sys_shutdown(struct kif_proto *p UNUSED)
2105 {
2106 }
2107
2108 int
2109 kif_update_sysdep_addr(struct iface *i UNUSED)
2110 {
2111 return 0;
2112 }