]> git.ipfire.org Git - thirdparty/bird.git/blame - sysdep/linux/netlink.c
Nest: Keep multipath next hops sorted
[thirdparty/bird.git] / sysdep / linux / netlink.c
CommitLineData
95616c82
OZ
1/*
2 * BIRD -- Linux Netlink Interface
3 *
4 * (c) 1999--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9#include <stdio.h>
f83ce94d 10#include <unistd.h>
95616c82
OZ
11#include <fcntl.h>
12#include <sys/socket.h>
13#include <sys/uio.h>
14#include <errno.h>
15
16#undef LOCAL_DEBUG
17
18#include "nest/bird.h"
19#include "nest/route.h"
20#include "nest/protocol.h"
21#include "nest/iface.h"
22#include "lib/alloca.h"
23#include "lib/timer.h"
24#include "lib/unix.h"
25#include "lib/krt.h"
26#include "lib/socket.h"
27#include "lib/string.h"
9ddbfbdd 28#include "lib/hash.h"
95616c82
OZ
29#include "conf/conf.h"
30
31#include <asm/types.h>
32#include <linux/if.h>
33#include <linux/netlink.h>
34#include <linux/rtnetlink.h>
35
9ddbfbdd 36
95616c82
OZ
37#ifndef MSG_TRUNC /* Hack: Several versions of glibc miss this one :( */
38#define MSG_TRUNC 0x20
39#endif
40
a08a81c6
OZ
41#ifndef IFA_FLAGS
42#define IFA_FLAGS 8
43#endif
44
95616c82
OZ
45#ifndef IFF_LOWER_UP
46#define IFF_LOWER_UP 0x10000
47#endif
48
9ddbfbdd
JMM
49#ifndef RTA_TABLE
50#define RTA_TABLE 15
51#endif
52
53
95616c82
OZ
54/*
55 * Synchronous Netlink interface
56 */
57
58struct nl_sock
59{
60 int fd;
61 u32 seq;
62 byte *rx_buffer; /* Receive buffer */
63 struct nlmsghdr *last_hdr; /* Recently received packet */
ae80a2de 64 uint last_size;
95616c82
OZ
65};
66
67#define NL_RX_SIZE 8192
68
69static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for synchronous scan */
70static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */
71
72static void
73nl_open_sock(struct nl_sock *nl)
74{
75 if (nl->fd < 0)
76 {
77 nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
78 if (nl->fd < 0)
79 die("Unable to open rtnetlink socket: %m");
80 nl->seq = now;
81 nl->rx_buffer = xmalloc(NL_RX_SIZE);
82 nl->last_hdr = NULL;
83 nl->last_size = 0;
84 }
85}
86
87static void
88nl_open(void)
89{
90 nl_open_sock(&nl_scan);
91 nl_open_sock(&nl_req);
92}
93
94static void
95nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
96{
97 struct sockaddr_nl sa;
98
99 memset(&sa, 0, sizeof(sa));
100 sa.nl_family = AF_NETLINK;
101 nh->nlmsg_pid = 0;
102 nh->nlmsg_seq = ++(nl->seq);
103 if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
104 die("rtnetlink sendto: %m");
105 nl->last_hdr = NULL;
106}
107
108static void
86c3eea0 109nl_request_dump(int af, int cmd)
95616c82
OZ
110{
111 struct {
112 struct nlmsghdr nh;
113 struct rtgenmsg g;
641172c6
OZ
114 } req = {
115 .nh.nlmsg_type = cmd,
116 .nh.nlmsg_len = sizeof(req),
117 .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
118 .g.rtgen_family = af
119 };
95616c82
OZ
120 nl_send(&nl_scan, &req.nh);
121}
122
123static struct nlmsghdr *
124nl_get_reply(struct nl_sock *nl)
125{
126 for(;;)
127 {
128 if (!nl->last_hdr)
129 {
130 struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
131 struct sockaddr_nl sa;
31e9e101
ST
132 struct msghdr m = {
133 .msg_name = &sa,
134 .msg_namelen = sizeof(sa),
135 .msg_iov = &iov,
136 .msg_iovlen = 1,
137 };
95616c82
OZ
138 int x = recvmsg(nl->fd, &m, 0);
139 if (x < 0)
140 die("nl_get_reply: %m");
141 if (sa.nl_pid) /* It isn't from the kernel */
142 {
143 DBG("Non-kernel packet\n");
144 continue;
145 }
146 nl->last_size = x;
147 nl->last_hdr = (void *) nl->rx_buffer;
148 if (m.msg_flags & MSG_TRUNC)
149 bug("nl_get_reply: got truncated reply which should be impossible");
150 }
151 if (NLMSG_OK(nl->last_hdr, nl->last_size))
152 {
153 struct nlmsghdr *h = nl->last_hdr;
154 nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
155 if (h->nlmsg_seq != nl->seq)
156 {
157 log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
158 h->nlmsg_seq, nl->seq);
159 continue;
160 }
161 return h;
162 }
163 if (nl->last_size)
164 log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
165 nl->last_hdr = NULL;
166 }
167}
168
1123e707 169static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
95616c82
OZ
170
171static int
172nl_error(struct nlmsghdr *h)
173{
174 struct nlmsgerr *e;
175 int ec;
176
177 if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
178 {
179 log(L_WARN "Netlink: Truncated error message received");
180 return ENOBUFS;
181 }
182 e = (struct nlmsgerr *) NLMSG_DATA(h);
183 ec = -e->error;
184 if (ec)
185 log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
186 return ec;
187}
188
189static struct nlmsghdr *
190nl_get_scan(void)
191{
192 struct nlmsghdr *h = nl_get_reply(&nl_scan);
193
194 if (h->nlmsg_type == NLMSG_DONE)
195 return NULL;
196 if (h->nlmsg_type == NLMSG_ERROR)
197 {
198 nl_error(h);
199 return NULL;
200 }
201 return h;
202}
203
204static int
205nl_exchange(struct nlmsghdr *pkt)
206{
207 struct nlmsghdr *h;
208
209 nl_send(&nl_req, pkt);
210 for(;;)
211 {
212 h = nl_get_reply(&nl_req);
213 if (h->nlmsg_type == NLMSG_ERROR)
214 break;
215 log(L_WARN "nl_exchange: Unexpected reply received");
216 }
217 return nl_error(h) ? -1 : 0;
218}
219
220/*
221 * Netlink attributes
222 */
223
224static int nl_attr_len;
225
226static void *
227nl_checkin(struct nlmsghdr *h, int lsize)
228{
229 nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
230 if (nl_attr_len < 0)
231 {
232 log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
233 return NULL;
234 }
235 return NLMSG_DATA(h);
236}
237
ad276157
JMM
238struct nl_want_attrs {
239 u8 defined:1;
240 u8 checksize:1;
241 u8 size;
242};
243
244
245#define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
246
247static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = {
248 [IFLA_IFNAME] = { 1, 0, 0 },
249 [IFLA_MTU] = { 1, 1, sizeof(u32) },
250 [IFLA_WIRELESS] = { 1, 0, 0 },
251};
252
253
e37d2e3e 254#define BIRD_IFA_MAX (IFA_FLAGS+1)
ad276157
JMM
255
256#ifndef IPV6
257static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = {
258 [IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) },
259 [IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) },
260 [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) },
e37d2e3e 261 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
ad276157
JMM
262};
263#else
264static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
265 [IFA_ADDRESS] = { 1, 1, sizeof(ip6_addr) },
266 [IFA_LOCAL] = { 1, 1, sizeof(ip6_addr) },
e37d2e3e 267 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
ad276157
JMM
268};
269#endif
270
271
272#define BIRD_RTA_MAX (RTA_TABLE+1)
273
274static struct nl_want_attrs mpnh_attr_want4[BIRD_RTA_MAX] = {
275 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
276};
277
278#ifndef IPV6
279static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
280 [RTA_DST] = { 1, 1, sizeof(ip4_addr) },
281 [RTA_OIF] = { 1, 1, sizeof(u32) },
282 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
283 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
284 [RTA_PREFSRC] = { 1, 1, sizeof(ip4_addr) },
285 [RTA_METRICS] = { 1, 0, 0 },
286 [RTA_MULTIPATH] = { 1, 0, 0 },
287 [RTA_FLOW] = { 1, 1, sizeof(u32) },
288 [RTA_TABLE] = { 1, 1, sizeof(u32) },
289};
290#else
291static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
292 [RTA_DST] = { 1, 1, sizeof(ip6_addr) },
293 [RTA_IIF] = { 1, 1, sizeof(u32) },
294 [RTA_OIF] = { 1, 1, sizeof(u32) },
295 [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
296 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
297 [RTA_PREFSRC] = { 1, 1, sizeof(ip6_addr) },
298 [RTA_METRICS] = { 1, 0, 0 },
299 [RTA_FLOW] = { 1, 1, sizeof(u32) },
300 [RTA_TABLE] = { 1, 1, sizeof(u32) },
301};
302#endif
303
304
95616c82 305static int
ad276157 306nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, int ksize)
95616c82
OZ
307{
308 int max = ksize / sizeof(struct rtattr *);
309 bzero(k, ksize);
ad276157
JMM
310
311 for ( ; RTA_OK(a, nl_attr_len); a = RTA_NEXT(a, nl_attr_len))
95616c82 312 {
ad276157
JMM
313 if ((a->rta_type >= max) || !want[a->rta_type].defined)
314 continue;
315
316 if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size))
317 {
318 log(L_ERR "nl_parse_attrs: Malformed message received");
319 return 0;
320 }
321
322 k[a->rta_type] = a;
95616c82 323 }
ad276157 324
95616c82
OZ
325 if (nl_attr_len)
326 {
327 log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
328 return 0;
329 }
ad276157
JMM
330
331 return 1;
95616c82
OZ
332}
333
fce764f9 334static inline u32 rta_get_u32(struct rtattr *a)
acb04cfd
OZ
335{ return *(u32 *) RTA_DATA(a); }
336
337static inline ip4_addr rta_get_ip4(struct rtattr *a)
338{ return ip4_ntoh(*(ip4_addr *) RTA_DATA(a)); }
339
340static inline ip6_addr rta_get_ip6(struct rtattr *a)
341{ return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); }
342
343
9fdf9d29
OZ
344struct rtattr *
345nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen)
95616c82 346{
9fdf9d29
OZ
347 uint pos = NLMSG_ALIGN(h->nlmsg_len);
348 uint len = RTA_LENGTH(dlen);
95616c82
OZ
349
350 if (pos + len > bufsize)
351 bug("nl_add_attr: packet buffer overflow");
9fdf9d29
OZ
352
353 struct rtattr *a = (struct rtattr *)((char *)h + pos);
95616c82
OZ
354 a->rta_type = code;
355 a->rta_len = len;
356 h->nlmsg_len = pos + len;
9fdf9d29
OZ
357
358 if (dlen > 0)
359 memcpy(RTA_DATA(a), data, dlen);
360
361 return a;
95616c82
OZ
362}
363
364static inline void
365nl_add_attr_u32(struct nlmsghdr *h, unsigned bufsize, int code, u32 data)
366{
367 nl_add_attr(h, bufsize, code, &data, 4);
368}
369
370static inline void
371nl_add_attr_ipa(struct nlmsghdr *h, unsigned bufsize, int code, ip_addr ipa)
372{
373 ipa_hton(ipa);
374 nl_add_attr(h, bufsize, code, &ipa, sizeof(ipa));
375}
376
9fdf9d29
OZ
377static inline struct rtattr *
378nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
379{
380 return nl_add_attr(h, bufsize, code, NULL, 0);
381}
95616c82
OZ
382
383static inline void
9fdf9d29 384nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
95616c82 385{
9fdf9d29 386 a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a;
95616c82
OZ
387}
388
9fdf9d29
OZ
389static inline struct rtnexthop *
390nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
391{
392 uint pos = NLMSG_ALIGN(h->nlmsg_len);
393 uint len = RTNH_LENGTH(0);
394
395 if (pos + len > bufsize)
396 bug("nl_open_nexthop: packet buffer overflow");
397
398 h->nlmsg_len = pos + len;
399
400 return (void *)h + pos;
401}
402
403static inline void
404nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh)
405{
406 nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh;
407}
95616c82
OZ
408
409static void
410nl_add_multipath(struct nlmsghdr *h, unsigned bufsize, struct mpnh *nh)
411{
9fdf9d29
OZ
412 struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH);
413
95616c82 414 for (; nh; nh = nh->next)
9fdf9d29
OZ
415 {
416 struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize);
95616c82 417
9fdf9d29
OZ
418 rtnh->rtnh_flags = 0;
419 rtnh->rtnh_hops = nh->weight;
420 rtnh->rtnh_ifindex = nh->iface->index;
95616c82 421
38e835de 422 nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
95616c82 423
9fdf9d29
OZ
424 nl_close_nexthop(h, rtnh);
425 }
426
427 nl_close_attr(h, a);
428}
95616c82
OZ
429
430static struct mpnh *
431nl_parse_multipath(struct krt_proto *p, struct rtattr *ra)
432{
433 /* Temporary buffer for multicast nexthops */
434 static struct mpnh *nh_buffer;
435 static int nh_buf_size; /* in number of structures */
436 static int nh_buf_used;
437
ad276157 438 struct rtattr *a[BIRD_RTA_MAX];
95616c82
OZ
439 struct rtnexthop *nh = RTA_DATA(ra);
440 struct mpnh *rv, *first, **last;
441 int len = RTA_PAYLOAD(ra);
442
443 first = NULL;
444 last = &first;
445 nh_buf_used = 0;
446
447 while (len)
448 {
449 /* Use RTNH_OK(nh,len) ?? */
450 if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
451 return NULL;
452
453 if (nh_buf_used == nh_buf_size)
454 {
455 nh_buf_size = nh_buf_size ? (nh_buf_size * 2) : 4;
456 nh_buffer = xrealloc(nh_buffer, nh_buf_size * sizeof(struct mpnh));
457 }
458 *last = rv = nh_buffer + nh_buf_used++;
459 rv->next = NULL;
460 last = &(rv->next);
461
462 rv->weight = nh->rtnh_hops;
463 rv->iface = if_find_by_index(nh->rtnh_ifindex);
464 if (!rv->iface)
465 return NULL;
466
467 /* Nonexistent RTNH_PAYLOAD ?? */
468 nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
ad276157 469 nl_parse_attrs(RTNH_DATA(nh), mpnh_attr_want4, a, sizeof(a));
95616c82
OZ
470 if (a[RTA_GATEWAY])
471 {
95616c82
OZ
472 memcpy(&rv->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ip_addr));
473 ipa_ntoh(rv->gw);
474
475 neighbor *ng = neigh_find2(&p->p, &rv->gw, rv->iface,
476 (nh->rtnh_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
477 if (!ng || (ng->scope == SCOPE_HOST))
478 return NULL;
479 }
480 else
481 return NULL;
482
483 len -= NLMSG_ALIGN(nh->rtnh_len);
484 nh = RTNH_NEXT(nh);
485 }
486
487 return first;
488}
489
9fdf9d29
OZ
490static void
491nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, int max)
492{
493 struct rtattr *a = nl_open_attr(h, bufsize, RTA_METRICS);
494 int t;
495
496 for (t = 1; t < max; t++)
497 if (metrics[0] & (1 << t))
498 nl_add_attr_u32(h, bufsize, t, metrics[t]);
499
500 nl_close_attr(h, a);
501}
502
503static int
504nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int max)
505{
506 struct rtattr *a = RTA_DATA(hdr);
507 int len = RTA_PAYLOAD(hdr);
508
509 metrics[0] = 0;
510 for (; RTA_OK(a, len); a = RTA_NEXT(a, len))
511 {
512 if (a->rta_type == RTA_UNSPEC)
513 continue;
514
515 if (a->rta_type >= max)
516 continue;
517
518 if (RTA_PAYLOAD(a) != 4)
519 return -1;
520
521 metrics[0] |= 1 << a->rta_type;
acb04cfd 522 metrics[a->rta_type] = rta_get_u32(a);
9fdf9d29
OZ
523 }
524
525 if (len > 0)
526 return -1;
527
528 return 0;
529}
530
95616c82
OZ
531
532/*
533 * Scanning of interfaces
534 */
535
536static void
537nl_parse_link(struct nlmsghdr *h, int scan)
538{
539 struct ifinfomsg *i;
ad276157 540 struct rtattr *a[BIRD_IFLA_MAX];
95616c82
OZ
541 int new = h->nlmsg_type == RTM_NEWLINK;
542 struct iface f = {};
543 struct iface *ifi;
544 char *name;
545 u32 mtu;
ae80a2de 546 uint fl;
95616c82 547
ad276157 548 if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), ifla_attr_want, a, sizeof(a)))
95616c82 549 return;
ad276157 550 if (!a[IFLA_IFNAME] || (RTA_PAYLOAD(a[IFLA_IFNAME]) < 2) || !a[IFLA_MTU])
95616c82 551 {
ad276157
JMM
552 /*
553 * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
554 * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
555 * We simply ignore all such messages with IFLA_WIRELESS without notice.
556 */
557
558 if (a[IFLA_WIRELESS])
559 return;
560
561 log(L_ERR "KIF: Malformed message received");
95616c82
OZ
562 return;
563 }
ad276157 564
95616c82 565 name = RTA_DATA(a[IFLA_IFNAME]);
acb04cfd 566 mtu = rta_get_u32(a[IFLA_MTU]);
95616c82
OZ
567
568 ifi = if_find_by_index(i->ifi_index);
569 if (!new)
570 {
571 DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
572 if (!ifi)
573 return;
574
575 if_delete(ifi);
576 }
577 else
578 {
579 DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
580 if (ifi && strncmp(ifi->name, name, sizeof(ifi->name)-1))
581 if_delete(ifi);
582
583 strncpy(f.name, name, sizeof(f.name)-1);
584 f.index = i->ifi_index;
585 f.mtu = mtu;
586
587 fl = i->ifi_flags;
588 if (fl & IFF_UP)
589 f.flags |= IF_ADMIN_UP;
590 if (fl & IFF_LOWER_UP)
591 f.flags |= IF_LINK_UP;
592 if (fl & IFF_LOOPBACK) /* Loopback */
593 f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
594 else if (fl & IFF_POINTOPOINT) /* PtP */
595 f.flags |= IF_MULTICAST;
596 else if (fl & IFF_BROADCAST) /* Broadcast */
597 f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
598 else
599 f.flags |= IF_MULTIACCESS; /* NBMA */
3216eb03 600
16a3254c
OZ
601 if (fl & IFF_MULTICAST)
602 f.flags |= IF_MULTICAST;
603
3216eb03
OZ
604 ifi = if_update(&f);
605
606 if (!scan)
607 if_end_partial_update(ifi);
95616c82
OZ
608 }
609}
610
611static void
3216eb03 612nl_parse_addr(struct nlmsghdr *h, int scan)
95616c82
OZ
613{
614 struct ifaddrmsg *i;
ad276157 615 struct rtattr *a[BIRD_IFA_MAX];
95616c82
OZ
616 int new = h->nlmsg_type == RTM_NEWADDR;
617 struct ifa ifa;
618 struct iface *ifi;
619 int scope;
e37d2e3e 620 u32 ifa_flags;
95616c82 621
ad276157 622 if (!(i = nl_checkin(h, sizeof(*i))))
95616c82 623 return;
ad276157
JMM
624
625 switch (i->ifa_family)
626 {
627#ifndef IPV6
628 case AF_INET:
629 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a)))
630 return;
631 if (!a[IFA_LOCAL])
632 {
633 log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)");
634 return;
635 }
636 break;
95616c82 637#else
ad276157
JMM
638 case AF_INET6:
639 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a)))
640 return;
641 break;
95616c82 642#endif
ad276157
JMM
643 default:
644 return;
645 }
646
647 if (!a[IFA_ADDRESS])
95616c82 648 {
ad276157 649 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
95616c82
OZ
650 return;
651 }
652
e37d2e3e
OZ
653 if (a[IFA_FLAGS])
654 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
655 else
656 ifa_flags = i->ifa_flags;
657
95616c82
OZ
658 ifi = if_find_by_index(i->ifa_index);
659 if (!ifi)
660 {
661 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
662 return;
663 }
664
665 bzero(&ifa, sizeof(ifa));
666 ifa.iface = ifi;
e37d2e3e 667 if (ifa_flags & IFA_F_SECONDARY)
95616c82
OZ
668 ifa.flags |= IA_SECONDARY;
669
e37d2e3e
OZ
670#ifdef IPV6
671 /* Ignore tentative addresses silently */
672 if (ifa_flags & IFA_F_TENTATIVE)
673 return;
674#endif
675
95616c82
OZ
676 /* IFA_LOCAL can be unset for IPv6 interfaces */
677 memcpy(&ifa.ip, RTA_DATA(a[IFA_LOCAL] ? : a[IFA_ADDRESS]), sizeof(ifa.ip));
678 ipa_ntoh(ifa.ip);
679 ifa.pxlen = i->ifa_prefixlen;
680 if (i->ifa_prefixlen > BITS_PER_IP_ADDRESS)
681 {
682 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
683 new = 0;
684 }
685 if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS)
686 {
687 ip_addr addr;
688 memcpy(&addr, RTA_DATA(a[IFA_ADDRESS]), sizeof(addr));
689 ipa_ntoh(addr);
690 ifa.prefix = ifa.brd = addr;
691
692 /* It is either a host address or a peer address */
693 if (ipa_equal(ifa.ip, addr))
694 ifa.flags |= IA_HOST;
695 else
696 {
697 ifa.flags |= IA_PEER;
698 ifa.opposite = addr;
699 }
700 }
701 else
702 {
703 ip_addr netmask = ipa_mkmask(ifa.pxlen);
704 ifa.prefix = ipa_and(ifa.ip, netmask);
705 ifa.brd = ipa_or(ifa.ip, ipa_not(netmask));
706 if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 1)
707 ifa.opposite = ipa_opposite_m1(ifa.ip);
708
709#ifndef IPV6
710 if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 2)
711 ifa.opposite = ipa_opposite_m2(ifa.ip);
712
713 if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST])
714 {
715 ip_addr xbrd;
716 memcpy(&xbrd, RTA_DATA(a[IFA_BROADCAST]), sizeof(xbrd));
717 ipa_ntoh(xbrd);
718 if (ipa_equal(xbrd, ifa.prefix) || ipa_equal(xbrd, ifa.brd))
719 ifa.brd = xbrd;
720 else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */
721 log(L_ERR "KIF: Invalid broadcast address %I for %s", xbrd, ifi->name);
722 }
723#endif
724 }
725
726 scope = ipa_classify(ifa.ip);
727 if (scope < 0)
728 {
729 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
730 return;
731 }
732 ifa.scope = scope & IADDR_SCOPE_MASK;
733
734 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %I/%d, brd %I, opp %I\n",
735 ifi->index, ifi->name,
736 new ? "added" : "removed",
737 ifa.ip, ifa.flags, ifa.prefix, ifa.pxlen, ifa.brd, ifa.opposite);
3216eb03 738
95616c82
OZ
739 if (new)
740 ifa_update(&ifa);
741 else
742 ifa_delete(&ifa);
3216eb03
OZ
743
744 if (!scan)
745 if_end_partial_update(ifi);
95616c82
OZ
746}
747
748void
749kif_do_scan(struct kif_proto *p UNUSED)
750{
751 struct nlmsghdr *h;
752
753 if_start_update();
754
86c3eea0 755 nl_request_dump(AF_UNSPEC, RTM_GETLINK);
95616c82
OZ
756 while (h = nl_get_scan())
757 if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
758 nl_parse_link(h, 1);
759 else
760 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
761
86c3eea0 762 nl_request_dump(BIRD_AF, RTM_GETADDR);
95616c82
OZ
763 while (h = nl_get_scan())
764 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
3216eb03 765 nl_parse_addr(h, 1);
95616c82
OZ
766 else
767 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
768
769 if_end_update();
770}
771
772/*
773 * Routes
774 */
775
9ddbfbdd
JMM
776static inline u32
777krt_table_id(struct krt_proto *p)
778{
779 return KRT_CF->sys.table_id;
780}
781
782static HASH(struct krt_proto) nl_table_map;
783
784#define RTH_FN(k) u32_hash(k)
785#define RTH_EQ(k1,k2) k1 == k2
786#define RTH_KEY(p) krt_table_id(p)
787#define RTH_NEXT(p) p->sys.hash_next
788
789#define RTH_REHASH rth_rehash
790#define RTH_PARAMS /8, *2, 2, 2, 6, 20
791
792HASH_DEFINE_REHASH_FN(RTH, struct krt_proto)
95616c82
OZ
793
794int
795krt_capable(rte *e)
796{
797 rta *a = e->attrs;
798
799 if (a->cast != RTC_UNICAST)
800 return 0;
801
802 switch (a->dest)
803 {
804 case RTD_ROUTER:
805 case RTD_DEVICE:
806 if (a->iface == NULL)
807 return 0;
808 case RTD_BLACKHOLE:
809 case RTD_UNREACHABLE:
810 case RTD_PROHIBIT:
811 case RTD_MULTIPATH:
812 break;
813 default:
814 return 0;
815 }
816 return 1;
817}
818
819static inline int
820nh_bufsize(struct mpnh *nh)
821{
822 int rv = 0;
823 for (; nh != NULL; nh = nh->next)
9fdf9d29 824 rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
95616c82
OZ
825 return rv;
826}
827
828static int
829nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new)
830{
831 eattr *ea;
832 net *net = e->net;
833 rta *a = e->attrs;
834 struct {
835 struct nlmsghdr h;
836 struct rtmsg r;
9fdf9d29 837 char buf[128 + KRT_METRICS_MAX*8 + nh_bufsize(a->nexthops)];
95616c82
OZ
838 } r;
839
840 DBG("nl_send_route(%I/%d,new=%d)\n", net->n.prefix, net->n.pxlen, new);
841
842 bzero(&r.h, sizeof(r.h));
843 bzero(&r.r, sizeof(r.r));
844 r.h.nlmsg_type = new ? RTM_NEWROUTE : RTM_DELROUTE;
845 r.h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
846 r.h.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | (new ? NLM_F_CREATE|NLM_F_EXCL : 0);
847
848 r.r.rtm_family = BIRD_AF;
849 r.r.rtm_dst_len = net->n.pxlen;
95616c82
OZ
850 r.r.rtm_protocol = RTPROT_BIRD;
851 r.r.rtm_scope = RT_SCOPE_UNIVERSE;
852 nl_add_attr_ipa(&r.h, sizeof(r), RTA_DST, net->n.prefix);
853
9ddbfbdd
JMM
854 if (krt_table_id(p) < 256)
855 r.r.rtm_table = krt_table_id(p);
856 else
857 nl_add_attr_u32(&r.h, sizeof(r), RTA_TABLE, krt_table_id(p));
858
78a2cc28
OZ
859 /* For route delete, we do not specify route attributes */
860 if (!new)
861 return nl_exchange(&r.h);
862
863
95616c82 864 if (ea = ea_find(eattrs, EA_KRT_METRIC))
9fdf9d29 865 nl_add_attr_u32(&r.h, sizeof(r), RTA_PRIORITY, ea->u.data);
95616c82
OZ
866
867 if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
868 nl_add_attr_ipa(&r.h, sizeof(r), RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
869
870 if (ea = ea_find(eattrs, EA_KRT_REALM))
871 nl_add_attr_u32(&r.h, sizeof(r), RTA_FLOW, ea->u.data);
872
9fdf9d29
OZ
873
874 u32 metrics[KRT_METRICS_MAX];
875 metrics[0] = 0;
876
877 struct ea_walk_state ews = { .eattrs = eattrs };
878 while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX))
879 {
880 int id = ea->id - EA_KRT_METRICS;
881 metrics[0] |= 1 << id;
882 metrics[id] = ea->u.data;
883 }
884
885 if (metrics[0])
886 nl_add_metrics(&r.h, sizeof(r), metrics, KRT_METRICS_MAX);
887
888
95616c82
OZ
889 /* a->iface != NULL checked in krt_capable() for router and device routes */
890
891 switch (a->dest)
892 {
893 case RTD_ROUTER:
894 r.r.rtm_type = RTN_UNICAST;
895 nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, a->iface->index);
896 nl_add_attr_ipa(&r.h, sizeof(r), RTA_GATEWAY, a->gw);
897 break;
898 case RTD_DEVICE:
899 r.r.rtm_type = RTN_UNICAST;
900 nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, a->iface->index);
901 break;
902 case RTD_BLACKHOLE:
903 r.r.rtm_type = RTN_BLACKHOLE;
904 break;
905 case RTD_UNREACHABLE:
906 r.r.rtm_type = RTN_UNREACHABLE;
907 break;
908 case RTD_PROHIBIT:
909 r.r.rtm_type = RTN_PROHIBIT;
910 break;
911 case RTD_MULTIPATH:
912 r.r.rtm_type = RTN_UNICAST;
913 nl_add_multipath(&r.h, sizeof(r), a->nexthops);
914 break;
915 default:
916 bug("krt_capable inconsistent with nl_send_route");
917 }
918
919 return nl_exchange(&r.h);
920}
921
922void
923krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list *eattrs)
924{
925 int err = 0;
926
927 /*
928 * NULL for eattr of the old route is a little hack, but we don't
929 * get proper eattrs for old in rt_notify() anyway. NULL means no
930 * extended route attributes and therefore matches if the kernel
931 * route has any of them.
932 */
933
934 if (old)
935 nl_send_route(p, old, NULL, 0);
936
937 if (new)
938 err = nl_send_route(p, new, eattrs, 1);
939
940 if (err < 0)
941 n->n.flags |= KRF_SYNC_ERROR;
942 else
943 n->n.flags &= ~KRF_SYNC_ERROR;
944}
945
946
947#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
948
949static void
950nl_parse_route(struct nlmsghdr *h, int scan)
951{
952 struct krt_proto *p;
953 struct rtmsg *i;
ad276157 954 struct rtattr *a[BIRD_RTA_MAX];
95616c82
OZ
955 int new = h->nlmsg_type == RTM_NEWROUTE;
956
957 ip_addr dst = IPA_NONE;
958 u32 oif = ~0;
9ddbfbdd 959 u32 table;
95616c82
OZ
960 int src;
961
ad276157 962 if (!(i = nl_checkin(h, sizeof(*i))))
95616c82 963 return;
ad276157
JMM
964
965 switch (i->rtm_family)
95616c82 966 {
ad276157
JMM
967#ifndef IPV6
968 case AF_INET:
969 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a)))
970 return;
971 break;
972#else
973 case AF_INET6:
974 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a)))
975 return;
976 break;
977#endif
978 default:
979 return;
95616c82
OZ
980 }
981
ad276157 982
95616c82
OZ
983 if (a[RTA_DST])
984 {
985 memcpy(&dst, RTA_DATA(a[RTA_DST]), sizeof(dst));
986 ipa_ntoh(dst);
987 }
988
989 if (a[RTA_OIF])
acb04cfd 990 oif = rta_get_u32(a[RTA_OIF]);
95616c82 991
9ddbfbdd
JMM
992 if (a[RTA_TABLE])
993 table = rta_get_u32(a[RTA_TABLE]);
994 else
995 table = i->rtm_table;
996
997 p = HASH_FIND(nl_table_map, RTH, table); /* Do we know this table? */
998 DBG("KRT: Got %I/%d, type=%d, oif=%d, table=%d, prid=%d, proto=%s\n", dst, i->rtm_dst_len, i->rtm_type, oif, table, i->rtm_protocol, p ? p->p.name : "(none)");
95616c82 999 if (!p)
9ddbfbdd 1000 SKIP("unknown table %d\n", table);
95616c82
OZ
1001
1002
1003#ifdef IPV6
1004 if (a[RTA_IIF])
1005 SKIP("IIF set\n");
1006#else
1007 if (i->rtm_tos != 0) /* We don't support TOS */
1008 SKIP("TOS %02x\n", i->rtm_tos);
1009#endif
1010
1011 if (scan && !new)
1012 SKIP("RTM_DELROUTE in scan\n");
1013
1014 int c = ipa_classify_net(dst);
1015 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
1016 SKIP("strange class/scope\n");
1017
1018 // ignore rtm_scope, it is not a real scope
1019 // if (i->rtm_scope != RT_SCOPE_UNIVERSE)
1020 // SKIP("scope %u\n", i->rtm_scope);
1021
1022 switch (i->rtm_protocol)
1023 {
1024 case RTPROT_UNSPEC:
1025 SKIP("proto unspec\n");
1026
1027 case RTPROT_REDIRECT:
1028 src = KRT_SRC_REDIRECT;
1029 break;
1030
1031 case RTPROT_KERNEL:
1032 src = KRT_SRC_KERNEL;
1033 return;
1034
1035 case RTPROT_BIRD:
1036 if (!scan)
1037 SKIP("echo\n");
1038 src = KRT_SRC_BIRD;
1039 break;
1040
1041 case RTPROT_BOOT:
1042 default:
1043 src = KRT_SRC_ALIEN;
1044 }
1045
1046 net *net = net_get(p->p.table, dst, i->rtm_dst_len);
1047
1048 rta ra = {
094d2bdb 1049 .src= p->p.main_source,
95616c82
OZ
1050 .source = RTS_INHERIT,
1051 .scope = SCOPE_UNIVERSE,
1052 .cast = RTC_UNICAST
1053 };
1054
1055 switch (i->rtm_type)
1056 {
1057 case RTN_UNICAST:
1058
ad276157 1059 if (a[RTA_MULTIPATH] && (i->rtm_family == AF_INET))
95616c82
OZ
1060 {
1061 ra.dest = RTD_MULTIPATH;
1062 ra.nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH]);
1063 if (!ra.nexthops)
1064 {
1065 log(L_ERR "KRT: Received strange multipath route %I/%d",
1066 net->n.prefix, net->n.pxlen);
1067 return;
1068 }
9fdf9d29 1069
95616c82
OZ
1070 break;
1071 }
1072
1073 ra.iface = if_find_by_index(oif);
1074 if (!ra.iface)
1075 {
1076 log(L_ERR "KRT: Received route %I/%d with unknown ifindex %u",
1077 net->n.prefix, net->n.pxlen, oif);
1078 return;
1079 }
1080
1081 if (a[RTA_GATEWAY])
1082 {
1083 neighbor *ng;
1084 ra.dest = RTD_ROUTER;
1085 memcpy(&ra.gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra.gw));
1086 ipa_ntoh(ra.gw);
1087
9810d055 1088#ifdef IPV6
95616c82
OZ
1089 /* Silently skip strange 6to4 routes */
1090 if (ipa_in_net(ra.gw, IPA_NONE, 96))
1091 return;
9810d055 1092#endif
95616c82
OZ
1093
1094 ng = neigh_find2(&p->p, &ra.gw, ra.iface,
1095 (i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
1096 if (!ng || (ng->scope == SCOPE_HOST))
1097 {
1098 log(L_ERR "KRT: Received route %I/%d with strange next-hop %I",
1099 net->n.prefix, net->n.pxlen, ra.gw);
1100 return;
1101 }
1102 }
1103 else
1104 {
1105 ra.dest = RTD_DEVICE;
95616c82
OZ
1106 }
1107
1108 break;
1109 case RTN_BLACKHOLE:
1110 ra.dest = RTD_BLACKHOLE;
1111 break;
1112 case RTN_UNREACHABLE:
1113 ra.dest = RTD_UNREACHABLE;
1114 break;
1115 case RTN_PROHIBIT:
1116 ra.dest = RTD_PROHIBIT;
1117 break;
1118 /* FIXME: What about RTN_THROW? */
1119 default:
1120 SKIP("type %d\n", i->rtm_type);
1121 return;
1122 }
1123
1124 rte *e = rte_get_temp(&ra);
1125 e->net = net;
1126 e->u.krt.src = src;
1127 e->u.krt.proto = i->rtm_protocol;
e86cfd41
OZ
1128 e->u.krt.seen = 0;
1129 e->u.krt.best = 0;
acb04cfd 1130 e->u.krt.metric = 0;
95616c82
OZ
1131
1132 if (a[RTA_PRIORITY])
acb04cfd 1133 e->u.krt.metric = rta_get_u32(a[RTA_PRIORITY]);
95616c82
OZ
1134
1135 if (a[RTA_PREFSRC])
1136 {
1137 ip_addr ps;
1138 memcpy(&ps, RTA_DATA(a[RTA_PREFSRC]), sizeof(ps));
1139 ipa_ntoh(ps);
1140
1141 ea_list *ea = alloca(sizeof(ea_list) + sizeof(eattr));
1142 ea->next = ra.eattrs;
1143 ra.eattrs = ea;
1144 ea->flags = EALF_SORTED;
1145 ea->count = 1;
1146 ea->attrs[0].id = EA_KRT_PREFSRC;
1147 ea->attrs[0].flags = 0;
1148 ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
1149 ea->attrs[0].u.ptr = alloca(sizeof(struct adata) + sizeof(ps));
1150 ea->attrs[0].u.ptr->length = sizeof(ps);
1151 memcpy(ea->attrs[0].u.ptr->data, &ps, sizeof(ps));
1152 }
1153
1154 if (a[RTA_FLOW])
1155 {
1156 ea_list *ea = alloca(sizeof(ea_list) + sizeof(eattr));
1157 ea->next = ra.eattrs;
1158 ra.eattrs = ea;
1159 ea->flags = EALF_SORTED;
1160 ea->count = 1;
1161 ea->attrs[0].id = EA_KRT_REALM;
1162 ea->attrs[0].flags = 0;
1163 ea->attrs[0].type = EAF_TYPE_INT;
acb04cfd 1164 ea->attrs[0].u.data = rta_get_u32(a[RTA_FLOW]);
95616c82
OZ
1165 }
1166
9fdf9d29
OZ
1167 if (a[RTA_METRICS])
1168 {
1169 u32 metrics[KRT_METRICS_MAX];
1170 ea_list *ea = alloca(sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
1171 int t, n = 0;
1172
1173 if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
1174 {
1175 log(L_ERR "KRT: Received route %I/%d with strange RTA_METRICS attribute",
1176 net->n.prefix, net->n.pxlen);
1177 return;
1178 }
1179
1180 for (t = 1; t < KRT_METRICS_MAX; t++)
1181 if (metrics[0] & (1 << t))
1182 {
1183 ea->attrs[n].id = EA_CODE(EAP_KRT, KRT_METRICS_OFFSET + t);
1184 ea->attrs[n].flags = 0;
1185 ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */
1186 ea->attrs[n].u.data = metrics[t];
1187 n++;
1188 }
1189
1190 if (n > 0)
1191 {
1192 ea->next = ra.eattrs;
1193 ea->flags = EALF_SORTED;
1194 ea->count = n;
1195 ra.eattrs = ea;
1196 }
1197 }
1198
95616c82
OZ
1199 if (scan)
1200 krt_got_route(p, e);
1201 else
1202 krt_got_route_async(p, e, new);
1203}
1204
1205void
1206krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
1207{
1208 struct nlmsghdr *h;
1209
86c3eea0 1210 nl_request_dump(BIRD_AF, RTM_GETROUTE);
95616c82
OZ
1211 while (h = nl_get_scan())
1212 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
1213 nl_parse_route(h, 1);
1214 else
1215 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
1216}
1217
1218/*
1219 * Asynchronous Netlink interface
1220 */
1221
1222static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
1223static byte *nl_async_rx_buffer; /* Receive buffer */
1224
1225static void
1226nl_async_msg(struct nlmsghdr *h)
1227{
1228 switch (h->nlmsg_type)
1229 {
1230 case RTM_NEWROUTE:
1231 case RTM_DELROUTE:
1232 DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
1233 nl_parse_route(h, 0);
1234 break;
1235 case RTM_NEWLINK:
1236 case RTM_DELLINK:
1237 DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
1e4891e4
OZ
1238 if (kif_proto)
1239 nl_parse_link(h, 0);
95616c82
OZ
1240 break;
1241 case RTM_NEWADDR:
1242 case RTM_DELADDR:
1243 DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
1e4891e4
OZ
1244 if (kif_proto)
1245 nl_parse_addr(h, 0);
95616c82
OZ
1246 break;
1247 default:
1248 DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
1249 }
1250}
1251
1252static int
1253nl_async_hook(sock *sk, int size UNUSED)
1254{
1255 struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
1256 struct sockaddr_nl sa;
31e9e101
ST
1257 struct msghdr m = {
1258 .msg_name = &sa,
1259 .msg_namelen = sizeof(sa),
1260 .msg_iov = &iov,
1261 .msg_iovlen = 1,
1262 };
95616c82
OZ
1263 struct nlmsghdr *h;
1264 int x;
ae80a2de 1265 uint len;
95616c82
OZ
1266
1267 x = recvmsg(sk->fd, &m, 0);
1268 if (x < 0)
1269 {
1270 if (errno == ENOBUFS)
1271 {
1272 /*
1273 * Netlink reports some packets have been thrown away.
1274 * One day we might react to it by asking for route table
1275 * scan in near future.
1276 */
1277 return 1; /* More data are likely to be ready */
1278 }
1279 else if (errno != EWOULDBLOCK)
1280 log(L_ERR "Netlink recvmsg: %m");
1281 return 0;
1282 }
1283 if (sa.nl_pid) /* It isn't from the kernel */
1284 {
1285 DBG("Non-kernel packet\n");
1286 return 1;
1287 }
1288 h = (void *) nl_async_rx_buffer;
1289 len = x;
1290 if (m.msg_flags & MSG_TRUNC)
1291 {
1292 log(L_WARN "Netlink got truncated asynchronous message");
1293 return 1;
1294 }
1295 while (NLMSG_OK(h, len))
1296 {
1297 nl_async_msg(h);
1298 h = NLMSG_NEXT(h, len);
1299 }
1300 if (len)
1301 log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
1302 return 1;
1303}
1304
1305static void
1306nl_open_async(void)
1307{
1308 sock *sk;
1309 struct sockaddr_nl sa;
1310 int fd;
95616c82 1311
f83ce94d 1312 if (nl_async_sk)
95616c82 1313 return;
95616c82
OZ
1314
1315 DBG("KRT: Opening async netlink socket\n");
1316
1317 fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1318 if (fd < 0)
1319 {
1320 log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
1321 return;
1322 }
1323
1324 bzero(&sa, sizeof(sa));
1325 sa.nl_family = AF_NETLINK;
1326#ifdef IPV6
1327 sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
1328#else
1329 sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE;
1330#endif
1331 if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
1332 {
1333 log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
f83ce94d 1334 close(fd);
95616c82
OZ
1335 return;
1336 }
1337
f83ce94d
OZ
1338 nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
1339
95616c82
OZ
1340 sk = nl_async_sk = sk_new(krt_pool);
1341 sk->type = SK_MAGIC;
1342 sk->rx_hook = nl_async_hook;
1343 sk->fd = fd;
05476c4d 1344 if (sk_open(sk) < 0)
95616c82 1345 bug("Netlink: sk_open failed");
95616c82
OZ
1346}
1347
9ddbfbdd 1348
95616c82
OZ
1349/*
1350 * Interface to the UNIX krt module
1351 */
1352
95616c82 1353void
9ddbfbdd
JMM
1354krt_sys_io_init(void)
1355{
1356 HASH_INIT(nl_table_map, krt_pool, 6);
1357}
1358
1359int
c6964c30 1360krt_sys_start(struct krt_proto *p)
95616c82 1361{
9ddbfbdd
JMM
1362 struct krt_proto *old = HASH_FIND(nl_table_map, RTH, krt_table_id(p));
1363
1364 if (old)
1365 {
1366 log(L_ERR "%s: Kernel table %u already registered by %s",
1367 p->p.name, krt_table_id(p), old->p.name);
1368 return 0;
1369 }
1370
1371 HASH_INSERT2(nl_table_map, RTH, krt_pool, p);
c6964c30
OZ
1372
1373 nl_open();
1374 nl_open_async();
9ddbfbdd
JMM
1375
1376 return 1;
95616c82
OZ
1377}
1378
1379void
9ddbfbdd 1380krt_sys_shutdown(struct krt_proto *p)
95616c82 1381{
9ddbfbdd 1382 HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
95616c82
OZ
1383}
1384
1385int
1386krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
1387{
1388 return n->sys.table_id == o->sys.table_id;
1389}
1390
95616c82
OZ
1391void
1392krt_sys_init_config(struct krt_config *cf)
1393{
1394 cf->sys.table_id = RT_TABLE_MAIN;
1395}
1396
1397void
1398krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
1399{
1400 d->sys.table_id = s->sys.table_id;
1401}
1402
9fdf9d29
OZ
1403static const char *krt_metrics_names[KRT_METRICS_MAX] = {
1404 NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
1405 "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
1406};
1407
1408static const char *krt_features_names[KRT_FEATURES_MAX] = {
1409 "ecn", NULL, NULL, "allfrag"
1410};
1411
1412int
1413krt_sys_get_attr(eattr *a, byte *buf, int buflen UNUSED)
1414{
1415 switch (a->id)
1416 {
1417 case EA_KRT_PREFSRC:
1418 bsprintf(buf, "prefsrc");
1419 return GA_NAME;
1420
1421 case EA_KRT_REALM:
1422 bsprintf(buf, "realm");
1423 return GA_NAME;
1424
1425 case EA_KRT_LOCK:
1426 buf += bsprintf(buf, "lock:");
1427 ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
1428 return GA_FULL;
1429
1430 case EA_KRT_FEATURES:
1431 buf += bsprintf(buf, "features:");
1432 ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
1433 return GA_FULL;
1434
1435 default:;
1436 int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET;
1437 if (id > 0 && id < KRT_METRICS_MAX)
1438 {
1439 bsprintf(buf, "%s", krt_metrics_names[id]);
1440 return GA_NAME;
1441 }
1442
1443 return GA_UNKNOWN;
1444 }
1445}
1446
95616c82
OZ
1447
1448
1449void
1450kif_sys_start(struct kif_proto *p UNUSED)
1451{
1452 nl_open();
1453 nl_open_async();
1454}
1455
1456void
1457kif_sys_shutdown(struct kif_proto *p UNUSED)
1458{
1459}