]> git.ipfire.org Git - thirdparty/bird.git/blame - sysdep/linux/netlink.c
KRT: Scan routing tables separetely on linux to avoid congestion
[thirdparty/bird.git] / sysdep / linux / netlink.c
CommitLineData
95616c82
OZ
1/*
2 * BIRD -- Linux Netlink Interface
3 *
4 * (c) 1999--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
a8caff32 9#include <alloca.h>
95616c82 10#include <stdio.h>
f83ce94d 11#include <unistd.h>
95616c82
OZ
12#include <fcntl.h>
13#include <sys/socket.h>
14#include <sys/uio.h>
15#include <errno.h>
16
17#undef LOCAL_DEBUG
18
19#include "nest/bird.h"
20#include "nest/route.h"
21#include "nest/protocol.h"
22#include "nest/iface.h"
4e276a89 23#include "lib/alloca.h"
7152e5ef
JMM
24#include "sysdep/unix/unix.h"
25#include "sysdep/unix/krt.h"
95616c82
OZ
26#include "lib/socket.h"
27#include "lib/string.h"
9ddbfbdd 28#include "lib/hash.h"
95616c82
OZ
29#include "conf/conf.h"
30
31#include <asm/types.h>
32#include <linux/if.h>
33#include <linux/netlink.h>
34#include <linux/rtnetlink.h>
35
6b0f5f68
MJM
36#ifdef HAVE_MPLS_KERNEL
37#include <linux/lwtunnel.h>
38#endif
9ddbfbdd 39
95616c82
OZ
40#ifndef MSG_TRUNC /* Hack: Several versions of glibc miss this one :( */
41#define MSG_TRUNC 0x20
42#endif
43
a08a81c6
OZ
44#ifndef IFA_FLAGS
45#define IFA_FLAGS 8
46#endif
47
95616c82
OZ
48#ifndef IFF_LOWER_UP
49#define IFF_LOWER_UP 0x10000
50#endif
51
9ddbfbdd
JMM
52#ifndef RTA_TABLE
53#define RTA_TABLE 15
54#endif
55
d14f8c3c
JMM
56#ifndef RTA_VIA
57#define RTA_VIA 18
58#endif
59
60#ifndef RTA_NEWDST
61#define RTA_NEWDST 19
62#endif
63
64#ifndef RTA_ENCAP_TYPE
65#define RTA_ENCAP_TYPE 21
66#endif
67
68#ifndef RTA_ENCAP
69#define RTA_ENCAP 22
70#endif
9ddbfbdd 71
8988264a
OZ
72#ifndef NETLINK_GET_STRICT_CHK
73#define NETLINK_GET_STRICT_CHK 12
74#endif
75
8235c474 76#define krt_ipv4(p) ((p)->af == AF_INET)
cc5b93f7 77#define krt_ecmp6(p) ((p)->af == AF_INET6)
2feaa693 78
517d05df
OZ
79const int rt_default_ecmp = 16;
80
2feaa693
OZ
81/*
82 * Structure nl_parse_state keeps state of received route processing. Ideally,
83 * we could just independently parse received Netlink messages and immediately
98bb80a2
OZ
84 * propagate received routes to the rest of BIRD, but older Linux kernel (before
85 * version 4.11) represents and announces IPv6 ECMP routes not as one route with
86 * multiple next hops (like RTA_MULTIPATH in IPv4 ECMP), but as a sequence of
87 * routes with the same prefix. More recent kernels work as with IPv4.
2feaa693
OZ
88 *
89 * Therefore, BIRD keeps currently processed route in nl_parse_state structure
90 * and postpones its propagation until we expect it to be final; i.e., when
91 * non-matching route is received or when the scan ends. When another matching
92 * route is received, it is merged with the already processed route to form an
93 * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
98bb80a2
OZ
94 * postponing is done in both cases (for simplicity). All IPv4 routes or IPv6
95 * routes with RTA_MULTIPATH set are just considered non-matching.
2feaa693
OZ
96 *
97 * This is ignored for asynchronous notifications (every notification is handled
98 * as a separate route). It is not an issue for our routes, as we ignore such
99 * notifications anyways. But importing alien IPv6 ECMP routes does not work
98bb80a2
OZ
100 * properly with older kernels.
101 *
102 * Whatever the kernel version is, IPv6 ECMP routes are sent as multiple routes
103 * for the same prefix.
2feaa693
OZ
104 */
105
106struct nl_parse_state
107{
108 struct linpool *pool;
109 int scan;
110 int merge;
111
112 net *net;
113 rta *attrs;
114 struct krt_proto *proto;
115 s8 new;
116 s8 krt_src;
117 u8 krt_type;
118 u8 krt_proto;
119 u32 krt_metric;
21f9acd2
OZ
120
121 u32 rta_flow; /* Used during parsing */
2feaa693
OZ
122};
123
95616c82
OZ
124/*
125 * Synchronous Netlink interface
126 */
127
128struct nl_sock
129{
130 int fd;
131 u32 seq;
132 byte *rx_buffer; /* Receive buffer */
133 struct nlmsghdr *last_hdr; /* Recently received packet */
ae80a2de 134 uint last_size;
95616c82
OZ
135};
136
e818f164 137#define NL_RX_SIZE 32768
95616c82 138
2feaa693
OZ
139#define NL_OP_DELETE 0
140#define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
141#define NL_OP_REPLACE (NLM_F_CREATE|NLM_F_REPLACE)
142#define NL_OP_APPEND (NLM_F_CREATE|NLM_F_APPEND)
143
144static linpool *nl_linpool;
145
95616c82
OZ
146static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for synchronous scan */
147static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */
148
149static void
150nl_open_sock(struct nl_sock *nl)
151{
152 if (nl->fd < 0)
153 {
154 nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
155 if (nl->fd < 0)
156 die("Unable to open rtnetlink socket: %m");
574b2324 157 nl->seq = (u32) (current_time() TO_S); /* Or perhaps random_u32() ? */
95616c82
OZ
158 nl->rx_buffer = xmalloc(NL_RX_SIZE);
159 nl->last_hdr = NULL;
160 nl->last_size = 0;
161 }
162}
163
534d0a4b 164static int
ef614f29 165nl_set_strict_dump(struct nl_sock *nl UNUSED, int strict UNUSED)
e818f164 166{
bbc33f6e 167#ifdef SOL_NETLINK
534d0a4b
OZ
168 return setsockopt(nl->fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &strict, sizeof(strict));
169#else
170 return -1;
bbc33f6e 171#endif
e818f164
OZ
172}
173
81ee6cda
OZ
174static void
175nl_set_rcvbuf(int fd, uint val)
176{
177 if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)) < 0)
178 log(L_WARN "KRT: Cannot set netlink rx buffer size to %u: %m", val);
179}
180
181static uint
182nl_cfg_rx_buffer_size(struct config *cfg)
183{
184 uint bufsize = 0;
185
186 struct proto_config *pc;
187 WALK_LIST(pc, cfg->protos)
188 if ((pc->protocol == &proto_unix_kernel) && !pc->disabled)
189 bufsize = MAX(bufsize, ((struct krt_config *) pc)->sys.netlink_rx_buffer);
190
191 return bufsize;
192}
193
194
95616c82
OZ
195static void
196nl_open(void)
197{
534d0a4b
OZ
198 if ((nl_scan.fd >= 0) && (nl_req.fd >= 0))
199 return;
200
95616c82
OZ
201 nl_open_sock(&nl_scan);
202 nl_open_sock(&nl_req);
e818f164 203
534d0a4b
OZ
204 if (nl_set_strict_dump(&nl_scan, 1) < 0)
205 {
206 log(L_WARN "KRT: Netlink strict checking failed, will scan all tables at once");
207 krt_use_shared_scan();
208 }
95616c82
OZ
209}
210
211static void
212nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
213{
214 struct sockaddr_nl sa;
215
216 memset(&sa, 0, sizeof(sa));
217 sa.nl_family = AF_NETLINK;
218 nh->nlmsg_pid = 0;
219 nh->nlmsg_seq = ++(nl->seq);
53401bef 220 nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len);
95616c82
OZ
221 if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
222 die("rtnetlink sendto: %m");
223 nl->last_hdr = NULL;
224}
225
226static void
e818f164 227nl_request_dump_link(void)
95616c82
OZ
228{
229 struct {
230 struct nlmsghdr nh;
e818f164 231 struct ifinfomsg ifi;
641172c6 232 } req = {
e818f164
OZ
233 .nh.nlmsg_type = RTM_GETLINK,
234 .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
641172c6 235 .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
e818f164
OZ
236 .nh.nlmsg_seq = ++(nl_scan.seq),
237 .ifi.ifi_family = AF_UNSPEC,
641172c6 238 };
e818f164
OZ
239
240 send(nl_scan.fd, &req, sizeof(req), 0);
241 nl_scan.last_hdr = NULL;
95616c82
OZ
242}
243
e818f164
OZ
244static void
245nl_request_dump_addr(int af)
246{
247 struct {
248 struct nlmsghdr nh;
249 struct ifaddrmsg ifa;
250 } req = {
251 .nh.nlmsg_type = RTM_GETADDR,
252 .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
253 .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
254 .nh.nlmsg_seq = ++(nl_scan.seq),
255 .ifa.ifa_family = af,
256 };
257
258 send(nl_scan.fd, &req, sizeof(req), 0);
259 nl_scan.last_hdr = NULL;
260}
261
262static void
534d0a4b 263nl_request_dump_route(int af, int table_id)
e818f164
OZ
264{
265 struct {
266 struct nlmsghdr nh;
267 struct rtmsg rtm;
534d0a4b
OZ
268 struct rtattr rta;
269 u32 table_id;
e818f164
OZ
270 } req = {
271 .nh.nlmsg_type = RTM_GETROUTE,
272 .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)),
273 .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
274 .nh.nlmsg_seq = ++(nl_scan.seq),
275 .rtm.rtm_family = af,
276 };
277
534d0a4b
OZ
278 if (table_id < 256)
279 req.rtm.rtm_table = table_id;
280 else
281 {
282 req.rta.rta_type = RTA_TABLE;
283 req.rta.rta_len = RTA_LENGTH(4);
284 req.table_id = table_id;
285 req.nh.nlmsg_len = NLMSG_ALIGN(req.nh.nlmsg_len) + req.rta.rta_len;
286 }
287
288 send(nl_scan.fd, &req, req.nh.nlmsg_len, 0);
e818f164
OZ
289 nl_scan.last_hdr = NULL;
290}
291
292
95616c82
OZ
293static struct nlmsghdr *
294nl_get_reply(struct nl_sock *nl)
295{
296 for(;;)
297 {
298 if (!nl->last_hdr)
299 {
300 struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
301 struct sockaddr_nl sa;
31e9e101
ST
302 struct msghdr m = {
303 .msg_name = &sa,
304 .msg_namelen = sizeof(sa),
305 .msg_iov = &iov,
306 .msg_iovlen = 1,
307 };
95616c82
OZ
308 int x = recvmsg(nl->fd, &m, 0);
309 if (x < 0)
310 die("nl_get_reply: %m");
311 if (sa.nl_pid) /* It isn't from the kernel */
312 {
313 DBG("Non-kernel packet\n");
314 continue;
315 }
316 nl->last_size = x;
317 nl->last_hdr = (void *) nl->rx_buffer;
318 if (m.msg_flags & MSG_TRUNC)
319 bug("nl_get_reply: got truncated reply which should be impossible");
320 }
321 if (NLMSG_OK(nl->last_hdr, nl->last_size))
322 {
323 struct nlmsghdr *h = nl->last_hdr;
324 nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
325 if (h->nlmsg_seq != nl->seq)
326 {
327 log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
328 h->nlmsg_seq, nl->seq);
329 continue;
330 }
331 return h;
332 }
333 if (nl->last_size)
334 log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
335 nl->last_hdr = NULL;
336 }
337}
338
1123e707 339static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
95616c82
OZ
340
341static int
2feaa693 342nl_error(struct nlmsghdr *h, int ignore_esrch)
95616c82
OZ
343{
344 struct nlmsgerr *e;
345 int ec;
346
347 if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
348 {
349 log(L_WARN "Netlink: Truncated error message received");
350 return ENOBUFS;
351 }
352 e = (struct nlmsgerr *) NLMSG_DATA(h);
353 ec = -e->error;
2feaa693 354 if (ec && !(ignore_esrch && (ec == ESRCH)))
95616c82
OZ
355 log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
356 return ec;
357}
358
359static struct nlmsghdr *
360nl_get_scan(void)
361{
362 struct nlmsghdr *h = nl_get_reply(&nl_scan);
363
364 if (h->nlmsg_type == NLMSG_DONE)
365 return NULL;
366 if (h->nlmsg_type == NLMSG_ERROR)
367 {
2feaa693 368 nl_error(h, 0);
95616c82
OZ
369 return NULL;
370 }
371 return h;
372}
373
374static int
2feaa693 375nl_exchange(struct nlmsghdr *pkt, int ignore_esrch)
95616c82
OZ
376{
377 struct nlmsghdr *h;
378
379 nl_send(&nl_req, pkt);
380 for(;;)
381 {
382 h = nl_get_reply(&nl_req);
383 if (h->nlmsg_type == NLMSG_ERROR)
384 break;
385 log(L_WARN "nl_exchange: Unexpected reply received");
386 }
2feaa693 387 return nl_error(h, ignore_esrch) ? -1 : 0;
95616c82
OZ
388}
389
390/*
391 * Netlink attributes
392 */
393
394static int nl_attr_len;
395
396static void *
397nl_checkin(struct nlmsghdr *h, int lsize)
398{
399 nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
400 if (nl_attr_len < 0)
401 {
402 log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
403 return NULL;
404 }
405 return NLMSG_DATA(h);
406}
407
ad276157
JMM
408struct nl_want_attrs {
409 u8 defined:1;
410 u8 checksize:1;
411 u8 size;
412};
413
414
415#define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
416
417static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = {
418 [IFLA_IFNAME] = { 1, 0, 0 },
419 [IFLA_MTU] = { 1, 1, sizeof(u32) },
943478b0 420 [IFLA_MASTER] = { 1, 1, sizeof(u32) },
ad276157
JMM
421 [IFLA_WIRELESS] = { 1, 0, 0 },
422};
423
29a64162 424
e37d2e3e 425#define BIRD_IFA_MAX (IFA_FLAGS+1)
ad276157 426
ad276157
JMM
427static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = {
428 [IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) },
429 [IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) },
430 [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) },
62e64905 431 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
ad276157 432};
29a64162 433
ad276157
JMM
434static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
435 [IFA_ADDRESS] = { 1, 1, sizeof(ip6_addr) },
436 [IFA_LOCAL] = { 1, 1, sizeof(ip6_addr) },
e37d2e3e 437 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
ad276157 438};
29a64162 439
ad276157 440
d14f8c3c 441#define BIRD_RTA_MAX (RTA_ENCAP+1)
ad276157 442
4e276a89 443static struct nl_want_attrs nexthop_attr_want4[BIRD_RTA_MAX] = {
ad276157 444 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
53401bef 445 [RTA_VIA] = { 1, 0, 0 },
21f9acd2 446 [RTA_FLOW] = { 1, 1, sizeof(u32) },
d14f8c3c
JMM
447 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
448 [RTA_ENCAP] = { 1, 0, 0 },
449};
450
4ff15a75 451static struct nl_want_attrs nexthop_attr_want6[BIRD_RTA_MAX] = {
98bb80a2 452 [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
53401bef 453 [RTA_VIA] = { 1, 0, 0 },
21f9acd2 454 [RTA_FLOW] = { 1, 1, sizeof(u32) },
4ff15a75
OZ
455 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
456 [RTA_ENCAP] = { 1, 0, 0 },
457};
458
6b0f5f68 459#ifdef HAVE_MPLS_KERNEL
f1b5f179
KY
460static struct nl_want_attrs nexthop_attr_want_mpls[BIRD_RTA_MAX] = {
461 [RTA_VIA] = { 1, 0, 0 },
462 [RTA_NEWDST] = { 1, 0, 0 },
463};
464
d14f8c3c
JMM
465static struct nl_want_attrs encap_mpls_want[BIRD_RTA_MAX] = {
466 [RTA_DST] = { 1, 0, 0 },
ad276157 467};
6b0f5f68 468#endif
ad276157 469
ad276157
JMM
470static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
471 [RTA_DST] = { 1, 1, sizeof(ip4_addr) },
472 [RTA_OIF] = { 1, 1, sizeof(u32) },
473 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
474 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
475 [RTA_PREFSRC] = { 1, 1, sizeof(ip4_addr) },
476 [RTA_METRICS] = { 1, 0, 0 },
477 [RTA_MULTIPATH] = { 1, 0, 0 },
478 [RTA_FLOW] = { 1, 1, sizeof(u32) },
479 [RTA_TABLE] = { 1, 1, sizeof(u32) },
53401bef 480 [RTA_VIA] = { 1, 0, 0 },
d14f8c3c
JMM
481 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
482 [RTA_ENCAP] = { 1, 0, 0 },
ad276157 483};
29a64162 484
ad276157
JMM
485static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
486 [RTA_DST] = { 1, 1, sizeof(ip6_addr) },
be17805c 487 [RTA_SRC] = { 1, 1, sizeof(ip6_addr) },
ad276157
JMM
488 [RTA_IIF] = { 1, 1, sizeof(u32) },
489 [RTA_OIF] = { 1, 1, sizeof(u32) },
490 [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
491 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
492 [RTA_PREFSRC] = { 1, 1, sizeof(ip6_addr) },
493 [RTA_METRICS] = { 1, 0, 0 },
98bb80a2 494 [RTA_MULTIPATH] = { 1, 0, 0 },
ad276157
JMM
495 [RTA_FLOW] = { 1, 1, sizeof(u32) },
496 [RTA_TABLE] = { 1, 1, sizeof(u32) },
53401bef 497 [RTA_VIA] = { 1, 0, 0 },
d14f8c3c
JMM
498 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
499 [RTA_ENCAP] = { 1, 0, 0 },
500};
501
6b0f5f68 502#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
503static struct nl_want_attrs rtm_attr_want_mpls[BIRD_RTA_MAX] = {
504 [RTA_DST] = { 1, 1, sizeof(u32) },
505 [RTA_IIF] = { 1, 1, sizeof(u32) },
506 [RTA_OIF] = { 1, 1, sizeof(u32) },
507 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
508 [RTA_METRICS] = { 1, 0, 0 },
f1b5f179 509 [RTA_MULTIPATH] = { 1, 0, 0 },
d14f8c3c
JMM
510 [RTA_FLOW] = { 1, 1, sizeof(u32) },
511 [RTA_TABLE] = { 1, 1, sizeof(u32) },
512 [RTA_VIA] = { 1, 0, 0 },
513 [RTA_NEWDST] = { 1, 0, 0 },
ad276157 514};
6b0f5f68 515#endif
ad276157
JMM
516
517
95616c82 518static int
ad276157 519nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, int ksize)
95616c82
OZ
520{
521 int max = ksize / sizeof(struct rtattr *);
522 bzero(k, ksize);
ad276157
JMM
523
524 for ( ; RTA_OK(a, nl_attr_len); a = RTA_NEXT(a, nl_attr_len))
95616c82 525 {
ad276157
JMM
526 if ((a->rta_type >= max) || !want[a->rta_type].defined)
527 continue;
528
529 if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size))
530 {
9b136840 531 log(L_ERR "nl_parse_attrs: Malformed attribute received");
ad276157
JMM
532 return 0;
533 }
534
535 k[a->rta_type] = a;
95616c82 536 }
ad276157 537
95616c82
OZ
538 if (nl_attr_len)
539 {
540 log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
541 return 0;
542 }
ad276157
JMM
543
544 return 1;
95616c82
OZ
545}
546
d14f8c3c
JMM
547static inline u16 rta_get_u16(struct rtattr *a)
548{ return *(u16 *) RTA_DATA(a); }
549
fce764f9 550static inline u32 rta_get_u32(struct rtattr *a)
acb04cfd
OZ
551{ return *(u32 *) RTA_DATA(a); }
552
553static inline ip4_addr rta_get_ip4(struct rtattr *a)
554{ return ip4_ntoh(*(ip4_addr *) RTA_DATA(a)); }
555
556static inline ip6_addr rta_get_ip6(struct rtattr *a)
557{ return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); }
558
9b136840
JMM
559static inline ip_addr rta_get_ipa(struct rtattr *a)
560{
561 if (RTA_PAYLOAD(a) == sizeof(ip4_addr))
562 return ipa_from_ip4(rta_get_ip4(a));
563 else
564 return ipa_from_ip6(rta_get_ip6(a));
565}
acb04cfd 566
6b0f5f68 567#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
568static inline ip_addr rta_get_via(struct rtattr *a)
569{
570 struct rtvia *v = RTA_DATA(a);
571 switch(v->rtvia_family) {
572 case AF_INET: return ipa_from_ip4(ip4_ntoh(*(ip4_addr *) v->rtvia_addr));
573 case AF_INET6: return ipa_from_ip6(ip6_ntoh(*(ip6_addr *) v->rtvia_addr));
574 }
575 return IPA_NONE;
576}
577
578static u32 rta_mpls_stack[MPLS_MAX_LABEL_STACK];
579static inline int rta_get_mpls(struct rtattr *a, u32 *stack)
580{
2eaf65ec
OZ
581 if (!a)
582 return 0;
583
d14f8c3c
JMM
584 if (RTA_PAYLOAD(a) % 4)
585 log(L_WARN "KRT: Strange length of received MPLS stack: %u", RTA_PAYLOAD(a));
586
2eaf65ec
OZ
587 int labels = mpls_get(RTA_DATA(a), RTA_PAYLOAD(a) & ~0x3, stack);
588
589 if (labels < 0)
590 {
591 log(L_WARN "KRT: Too long MPLS stack received, ignoring");
592 labels = 0;
593 }
594
595 return labels;
d14f8c3c 596}
6b0f5f68 597#endif
d14f8c3c 598
9fdf9d29
OZ
599struct rtattr *
600nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen)
95616c82 601{
9fdf9d29
OZ
602 uint pos = NLMSG_ALIGN(h->nlmsg_len);
603 uint len = RTA_LENGTH(dlen);
95616c82
OZ
604
605 if (pos + len > bufsize)
606 bug("nl_add_attr: packet buffer overflow");
9fdf9d29
OZ
607
608 struct rtattr *a = (struct rtattr *)((char *)h + pos);
95616c82
OZ
609 a->rta_type = code;
610 a->rta_len = len;
611 h->nlmsg_len = pos + len;
9fdf9d29
OZ
612
613 if (dlen > 0)
614 memcpy(RTA_DATA(a), data, dlen);
615
616 return a;
95616c82
OZ
617}
618
d14f8c3c
JMM
619static inline struct rtattr *
620nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
621{
622 return nl_add_attr(h, bufsize, code, NULL, 0);
623}
624
625static inline void
626nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
627{
628 a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a;
629}
630
631static inline void
632nl_add_attr_u16(struct nlmsghdr *h, uint bufsize, int code, u16 data)
633{
634 nl_add_attr(h, bufsize, code, &data, 2);
635}
636
95616c82 637static inline void
29a64162 638nl_add_attr_u32(struct nlmsghdr *h, uint bufsize, int code, u32 data)
95616c82
OZ
639{
640 nl_add_attr(h, bufsize, code, &data, 4);
641}
642
643static inline void
29a64162 644nl_add_attr_ip4(struct nlmsghdr *h, uint bufsize, int code, ip4_addr ip4)
95616c82 645{
29a64162
OZ
646 ip4 = ip4_hton(ip4);
647 nl_add_attr(h, bufsize, code, &ip4, sizeof(ip4));
648}
649
650static inline void
651nl_add_attr_ip6(struct nlmsghdr *h, uint bufsize, int code, ip6_addr ip6)
652{
653 ip6 = ip6_hton(ip6);
654 nl_add_attr(h, bufsize, code, &ip6, sizeof(ip6));
655}
656
657static inline void
658nl_add_attr_ipa(struct nlmsghdr *h, uint bufsize, int code, ip_addr ipa)
659{
660 if (ipa_is_ip4(ipa))
661 nl_add_attr_ip4(h, bufsize, code, ipa_to_ip4(ipa));
9b136840 662 else
29a64162 663 nl_add_attr_ip6(h, bufsize, code, ipa_to_ip6(ipa));
95616c82
OZ
664}
665
6b0f5f68 666#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
667static inline void
668nl_add_attr_mpls(struct nlmsghdr *h, uint bufsize, int code, int len, u32 *stack)
9fdf9d29 669{
d14f8c3c
JMM
670 char buf[len*4];
671 mpls_put(buf, len, stack);
672 nl_add_attr(h, bufsize, code, buf, len*4);
9fdf9d29 673}
95616c82
OZ
674
675static inline void
d14f8c3c 676nl_add_attr_mpls_encap(struct nlmsghdr *h, uint bufsize, int len, u32 *stack)
95616c82 677{
d14f8c3c
JMM
678 nl_add_attr_u16(h, bufsize, RTA_ENCAP_TYPE, LWTUNNEL_ENCAP_MPLS);
679
680 struct rtattr *nest = nl_open_attr(h, bufsize, RTA_ENCAP);
681 nl_add_attr_mpls(h, bufsize, RTA_DST, len, stack);
682 nl_close_attr(h, nest);
683}
684
685static inline void
686nl_add_attr_via(struct nlmsghdr *h, uint bufsize, ip_addr ipa)
687{
66acbc8d 688 struct rtvia *via = alloca(sizeof(struct rtvia) + 16);
d14f8c3c 689
62e64905
OZ
690 if (ipa_is_ip4(ipa))
691 {
d14f8c3c 692 via->rtvia_family = AF_INET;
62e64905 693 put_ip4(via->rtvia_addr, ipa_to_ip4(ipa));
66acbc8d 694 nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 4);
62e64905
OZ
695 }
696 else
697 {
d14f8c3c 698 via->rtvia_family = AF_INET6;
62e64905 699 put_ip6(via->rtvia_addr, ipa_to_ip6(ipa));
66acbc8d 700 nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 16);
d14f8c3c 701 }
95616c82 702}
6b0f5f68 703#endif
95616c82 704
9fdf9d29
OZ
705static inline struct rtnexthop *
706nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
707{
708 uint pos = NLMSG_ALIGN(h->nlmsg_len);
709 uint len = RTNH_LENGTH(0);
710
711 if (pos + len > bufsize)
712 bug("nl_open_nexthop: packet buffer overflow");
713
714 h->nlmsg_len = pos + len;
715
716 return (void *)h + pos;
717}
718
719static inline void
720nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh)
721{
722 nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh;
723}
95616c82 724
d14f8c3c 725static inline void
6b0f5f68 726nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af UNUSED)
d14f8c3c 727{
6b0f5f68 728#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
729 if (nh->labels > 0)
730 if (af == AF_MPLS)
731 nl_add_attr_mpls(h, bufsize, RTA_NEWDST, nh->labels, nh->label);
732 else
733 nl_add_attr_mpls_encap(h, bufsize, nh->labels, nh->label);
734
735 if (ipa_nonzero(nh->gw))
53401bef
OZ
736 {
737 if (af == (ipa_is_ip4(nh->gw) ? AF_INET : AF_INET6))
d14f8c3c 738 nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
53401bef
OZ
739 else
740 nl_add_attr_via(h, bufsize, nh->gw);
741 }
6b0f5f68
MJM
742#else
743
744 if (ipa_nonzero(nh->gw))
745 nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
746#endif
d14f8c3c
JMM
747}
748
95616c82 749static void
21f9acd2 750nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af, ea_list *eattrs)
95616c82 751{
9fdf9d29 752 struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH);
21f9acd2 753 eattr *flow = ea_find(eattrs, EA_KRT_REALM);
9fdf9d29 754
95616c82 755 for (; nh; nh = nh->next)
9fdf9d29
OZ
756 {
757 struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize);
95616c82 758
9fdf9d29
OZ
759 rtnh->rtnh_flags = 0;
760 rtnh->rtnh_hops = nh->weight;
761 rtnh->rtnh_ifindex = nh->iface->index;
95616c82 762
d14f8c3c 763 nl_add_nexthop(h, bufsize, nh, af);
95616c82 764
a1f5e514
OZ
765 if (nh->flags & RNF_ONLINK)
766 rtnh->rtnh_flags |= RTNH_F_ONLINK;
767
21f9acd2
OZ
768 /* Our KRT_REALM is per-route, but kernel RTA_FLOW is per-nexthop.
769 Therefore, we need to attach the same attribute to each nexthop. */
770 if (flow)
771 nl_add_attr_u32(h, bufsize, RTA_FLOW, flow->u.data);
772
9fdf9d29
OZ
773 nl_close_nexthop(h, rtnh);
774 }
775
776 nl_close_attr(h, a);
777}
95616c82 778
4e276a89 779static struct nexthop *
f5c8fb5f 780nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, const net_addr *n, struct rtattr *ra, int af, int krt_src)
95616c82 781{
ad276157 782 struct rtattr *a[BIRD_RTA_MAX];
95616c82 783 struct rtnexthop *nh = RTA_DATA(ra);
4e276a89 784 struct nexthop *rv, *first, **last;
3e236955 785 unsigned len = RTA_PAYLOAD(ra);
95616c82
OZ
786
787 first = NULL;
788 last = &first;
95616c82
OZ
789
790 while (len)
791 {
792 /* Use RTNH_OK(nh,len) ?? */
793 if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
77d032c7 794 goto err;
95616c82 795
f5c8fb5f 796 if ((nh->rtnh_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD))
df83f626
OZ
797 goto next;
798
3e792350 799 *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE);
95616c82
OZ
800 last = &(rv->next);
801
802 rv->weight = nh->rtnh_hops;
803 rv->iface = if_find_by_index(nh->rtnh_ifindex);
804 if (!rv->iface)
77d032c7
OZ
805 {
806 log(L_ERR "KRT: Received route %N with unknown ifindex %u", n, nh->rtnh_ifindex);
807 return NULL;
808 }
95616c82
OZ
809
810 /* Nonexistent RTNH_PAYLOAD ?? */
811 nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
98bb80a2
OZ
812 switch (af)
813 {
98bb80a2 814 case AF_INET:
4ff15a75 815 if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a)))
77d032c7 816 goto err;
98bb80a2 817 break;
4ff15a75 818
98bb80a2 819 case AF_INET6:
4ff15a75 820 if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want6, a, sizeof(a)))
77d032c7 821 goto err;
98bb80a2 822 break;
4ff15a75 823
f1b5f179
KY
824#ifdef HAVE_MPLS_KERNEL
825 case AF_MPLS:
826 if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want_mpls, a, sizeof(a)))
77d032c7 827 goto err;
f1b5f179
KY
828
829 if (a[RTA_NEWDST])
830 rv->labels = rta_get_mpls(a[RTA_NEWDST], rv->label);
831
832 break;
833#endif
834
98bb80a2 835 default:
77d032c7 836 goto err;
98bb80a2
OZ
837 }
838
95616c82 839 if (a[RTA_GATEWAY])
53401bef 840 rv->gw = rta_get_ipa(a[RTA_GATEWAY]);
95616c82 841
21f9acd2
OZ
842 if (a[RTA_FLOW])
843 s->rta_flow = rta_get_u32(a[RTA_FLOW]);
844
53401bef
OZ
845#ifdef HAVE_MPLS_KERNEL
846 if (a[RTA_VIA])
847 rv->gw = rta_get_via(a[RTA_VIA]);
848#endif
849
850 if (ipa_nonzero(rv->gw))
851 {
a1f5e514
OZ
852 if (nh->rtnh_flags & RTNH_F_ONLINK)
853 rv->flags |= RNF_ONLINK;
854
23c212e7 855 neighbor *nbr;
586c1800
OZ
856 nbr = neigh_find(&p->p, rv->gw, rv->iface,
857 (rv->flags & RNF_ONLINK) ? NEF_ONLINK : 0);
23c212e7 858 if (!nbr || (nbr->scope == SCOPE_HOST))
77d032c7
OZ
859 {
860 log(L_ERR "KRT: Received route %N with strange next-hop %I", n, rv->gw);
861 return NULL;
862 }
95616c82 863 }
62e64905 864
6b0f5f68 865#ifdef HAVE_MPLS_KERNEL
2eaf65ec 866 if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE])
6b0f5f68 867 {
77d032c7
OZ
868 if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS)
869 {
870 log(L_WARN "KRT: Received route %N with unknown encapsulation method %d",
871 n, rta_get_u16(a[RTA_ENCAP_TYPE]));
6b0f5f68 872 return NULL;
d14f8c3c
JMM
873 }
874
6b0f5f68
MJM
875 struct rtattr *enca[BIRD_RTA_MAX];
876 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
877 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
878 rv->labels = rta_get_mpls(enca[RTA_DST], rv->label);
6b0f5f68
MJM
879 }
880#endif
881
df83f626 882 next:
95616c82
OZ
883 len -= NLMSG_ALIGN(nh->rtnh_len);
884 nh = RTNH_NEXT(nh);
885 }
886
59d3a361
OZ
887 /* Ensure nexthops are sorted to satisfy nest invariant */
888 if (!nexthop_is_sorted(first))
889 first = nexthop_sort(first);
890
95616c82 891 return first;
77d032c7
OZ
892
893err:
894 log(L_ERR "KRT: Received strange multipath route %N", n);
895 return NULL;
95616c82
OZ
896}
897
9fdf9d29
OZ
898static void
899nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, int max)
900{
901 struct rtattr *a = nl_open_attr(h, bufsize, RTA_METRICS);
902 int t;
903
904 for (t = 1; t < max; t++)
905 if (metrics[0] & (1 << t))
906 nl_add_attr_u32(h, bufsize, t, metrics[t]);
907
908 nl_close_attr(h, a);
909}
910
911static int
912nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int max)
913{
914 struct rtattr *a = RTA_DATA(hdr);
915 int len = RTA_PAYLOAD(hdr);
916
917 metrics[0] = 0;
918 for (; RTA_OK(a, len); a = RTA_NEXT(a, len))
919 {
920 if (a->rta_type == RTA_UNSPEC)
921 continue;
922
923 if (a->rta_type >= max)
924 continue;
925
926 if (RTA_PAYLOAD(a) != 4)
927 return -1;
928
929 metrics[0] |= 1 << a->rta_type;
acb04cfd 930 metrics[a->rta_type] = rta_get_u32(a);
9fdf9d29
OZ
931 }
932
933 if (len > 0)
934 return -1;
935
936 return 0;
937}
938
95616c82
OZ
939
940/*
941 * Scanning of interfaces
942 */
943
944static void
945nl_parse_link(struct nlmsghdr *h, int scan)
946{
947 struct ifinfomsg *i;
ad276157 948 struct rtattr *a[BIRD_IFLA_MAX];
95616c82
OZ
949 int new = h->nlmsg_type == RTM_NEWLINK;
950 struct iface f = {};
951 struct iface *ifi;
952 char *name;
943478b0 953 u32 mtu, master = 0;
ae80a2de 954 uint fl;
95616c82 955
ad276157 956 if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), ifla_attr_want, a, sizeof(a)))
95616c82 957 return;
ad276157 958 if (!a[IFLA_IFNAME] || (RTA_PAYLOAD(a[IFLA_IFNAME]) < 2) || !a[IFLA_MTU])
95616c82 959 {
ad276157
JMM
960 /*
961 * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
962 * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
963 * We simply ignore all such messages with IFLA_WIRELESS without notice.
964 */
965
966 if (a[IFLA_WIRELESS])
967 return;
968
969 log(L_ERR "KIF: Malformed message received");
95616c82
OZ
970 return;
971 }
ad276157 972
95616c82 973 name = RTA_DATA(a[IFLA_IFNAME]);
acb04cfd 974 mtu = rta_get_u32(a[IFLA_MTU]);
95616c82 975
943478b0
OZ
976 if (a[IFLA_MASTER])
977 master = rta_get_u32(a[IFLA_MASTER]);
978
95616c82
OZ
979 ifi = if_find_by_index(i->ifi_index);
980 if (!new)
981 {
982 DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
983 if (!ifi)
984 return;
985
986 if_delete(ifi);
987 }
988 else
989 {
990 DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
991 if (ifi && strncmp(ifi->name, name, sizeof(ifi->name)-1))
992 if_delete(ifi);
993
994 strncpy(f.name, name, sizeof(f.name)-1);
995 f.index = i->ifi_index;
996 f.mtu = mtu;
997
943478b0
OZ
998 f.master_index = master;
999 f.master = if_find_by_index(master);
1000
95616c82
OZ
1001 fl = i->ifi_flags;
1002 if (fl & IFF_UP)
1003 f.flags |= IF_ADMIN_UP;
1004 if (fl & IFF_LOWER_UP)
1005 f.flags |= IF_LINK_UP;
1006 if (fl & IFF_LOOPBACK) /* Loopback */
1007 f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
1008 else if (fl & IFF_POINTOPOINT) /* PtP */
1009 f.flags |= IF_MULTICAST;
1010 else if (fl & IFF_BROADCAST) /* Broadcast */
1011 f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
1012 else
1013 f.flags |= IF_MULTIACCESS; /* NBMA */
3216eb03 1014
16a3254c
OZ
1015 if (fl & IFF_MULTICAST)
1016 f.flags |= IF_MULTICAST;
1017
3216eb03
OZ
1018 ifi = if_update(&f);
1019
1020 if (!scan)
1021 if_end_partial_update(ifi);
95616c82
OZ
1022 }
1023}
1024
1025static void
9b136840 1026nl_parse_addr4(struct ifaddrmsg *i, int scan, int new)
95616c82 1027{
ad276157 1028 struct rtattr *a[BIRD_IFA_MAX];
95616c82 1029 struct iface *ifi;
e37d2e3e 1030 u32 ifa_flags;
95616c82
OZ
1031 int scope;
1032
9b136840 1033 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a)))
95616c82 1034 return;
ad276157 1035
9b136840 1036 if (!a[IFA_LOCAL])
ad276157 1037 {
9b136840
JMM
1038 log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)");
1039 return;
ad276157 1040 }
ad276157 1041 if (!a[IFA_ADDRESS])
95616c82 1042 {
ad276157 1043 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
95616c82
OZ
1044 return;
1045 }
1046
1047 ifi = if_find_by_index(i->ifa_index);
1048 if (!ifi)
1049 {
1050 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
1051 return;
1052 }
1053
e37d2e3e
OZ
1054 if (a[IFA_FLAGS])
1055 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
1056 else
1057 ifa_flags = i->ifa_flags;
1058
9b136840 1059 struct ifa ifa;
95616c82
OZ
1060 bzero(&ifa, sizeof(ifa));
1061 ifa.iface = ifi;
cc5b93f7 1062 if (ifa_flags & IFA_F_SECONDARY)
95616c82
OZ
1063 ifa.flags |= IA_SECONDARY;
1064
9b136840
JMM
1065 ifa.ip = rta_get_ipa(a[IFA_LOCAL]);
1066
d7661fbe 1067 if (i->ifa_prefixlen > IP4_MAX_PREFIX_LENGTH)
95616c82
OZ
1068 {
1069 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
1070 new = 0;
1071 }
d7661fbe 1072 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH)
95616c82 1073 {
9b136840
JMM
1074 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
1075 net_fill_ip4(&ifa.prefix, rta_get_ip4(a[IFA_ADDRESS]), i->ifa_prefixlen);
95616c82
OZ
1076
1077 /* It is either a host address or a peer address */
9b136840 1078 if (ipa_equal(ifa.ip, ifa.brd))
95616c82
OZ
1079 ifa.flags |= IA_HOST;
1080 else
1081 {
1082 ifa.flags |= IA_PEER;
9b136840 1083 ifa.opposite = ifa.brd;
95616c82
OZ
1084 }
1085 }
1086 else
1087 {
9b136840
JMM
1088 net_fill_ip4(&ifa.prefix, ipa_to_ip4(ifa.ip), i->ifa_prefixlen);
1089 net_normalize(&ifa.prefix);
1090
d7661fbe 1091 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 1)
95616c82
OZ
1092 ifa.opposite = ipa_opposite_m1(ifa.ip);
1093
d7661fbe 1094 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 2)
95616c82
OZ
1095 ifa.opposite = ipa_opposite_m2(ifa.ip);
1096
e2630a49
OZ
1097 if (ifi->flags & IF_BROADCAST)
1098 {
1099 /* If kernel offers us a broadcast address, we trust it */
1100 if (a[IFA_BROADCAST])
1101 ifa.brd = ipa_from_ip4(rta_get_ip4(a[IFA_BROADCAST]));
1102 /* Otherwise we create one (except for /31) */
1103 else if (i->ifa_prefixlen < (IP4_MAX_PREFIX_LENGTH - 1))
1104 ifa.brd = ipa_from_ip4(ip4_or(ipa_to_ip4(ifa.ip),
1105 ip4_not(ip4_mkmask(i->ifa_prefixlen))));
9b136840
JMM
1106 }
1107 }
1108
1109 scope = ipa_classify(ifa.ip);
1110 if (scope < 0)
1111 {
1112 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
1113 return;
1114 }
1115 ifa.scope = scope & IADDR_SCOPE_MASK;
1116
1117 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
1118 ifi->index, ifi->name,
1119 new ? "added" : "removed",
4659b2ae 1120 ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite);
9b136840
JMM
1121
1122 if (new)
1123 ifa_update(&ifa);
1124 else
1125 ifa_delete(&ifa);
1126
1127 if (!scan)
1128 if_end_partial_update(ifi);
1129}
1130
1131static void
1132nl_parse_addr6(struct ifaddrmsg *i, int scan, int new)
1133{
1134 struct rtattr *a[BIRD_IFA_MAX];
1135 struct iface *ifi;
cc5b93f7 1136 u32 ifa_flags;
9b136840
JMM
1137 int scope;
1138
1139 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a)))
1140 return;
1141
1142 if (!a[IFA_ADDRESS])
1143 {
1144 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
1145 return;
1146 }
1147
1148 ifi = if_find_by_index(i->ifa_index);
1149 if (!ifi)
1150 {
1151 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
1152 return;
1153 }
1154
cc5b93f7
OZ
1155 if (a[IFA_FLAGS])
1156 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
1157 else
1158 ifa_flags = i->ifa_flags;
1159
9b136840
JMM
1160 struct ifa ifa;
1161 bzero(&ifa, sizeof(ifa));
1162 ifa.iface = ifi;
e37d2e3e 1163 if (ifa_flags & IFA_F_SECONDARY)
9b136840
JMM
1164 ifa.flags |= IA_SECONDARY;
1165
e37d2e3e
OZ
1166 /* Ignore tentative addresses silently */
1167 if (ifa_flags & IFA_F_TENTATIVE)
1168 return;
9b136840 1169
95616c82 1170 /* IFA_LOCAL can be unset for IPv6 interfaces */
9b136840
JMM
1171 ifa.ip = rta_get_ipa(a[IFA_LOCAL] ? : a[IFA_ADDRESS]);
1172
d7661fbe 1173 if (i->ifa_prefixlen > IP6_MAX_PREFIX_LENGTH)
9b136840
JMM
1174 {
1175 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
1176 new = 0;
1177 }
d7661fbe 1178 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH)
9b136840
JMM
1179 {
1180 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
1181 net_fill_ip6(&ifa.prefix, rta_get_ip6(a[IFA_ADDRESS]), i->ifa_prefixlen);
1182
1183 /* It is either a host address or a peer address */
1184 if (ipa_equal(ifa.ip, ifa.brd))
1185 ifa.flags |= IA_HOST;
1186 else
1187 {
1188 ifa.flags |= IA_PEER;
1189 ifa.opposite = ifa.brd;
95616c82 1190 }
9b136840
JMM
1191 }
1192 else
1193 {
1194 net_fill_ip6(&ifa.prefix, ipa_to_ip6(ifa.ip), i->ifa_prefixlen);
1195 net_normalize(&ifa.prefix);
1196
d7661fbe 1197 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH - 1)
9b136840 1198 ifa.opposite = ipa_opposite_m1(ifa.ip);
95616c82
OZ
1199 }
1200
1201 scope = ipa_classify(ifa.ip);
1202 if (scope < 0)
1203 {
1204 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
1205 return;
1206 }
1207 ifa.scope = scope & IADDR_SCOPE_MASK;
1208
9b136840 1209 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
95616c82
OZ
1210 ifi->index, ifi->name,
1211 new ? "added" : "removed",
4659b2ae 1212 ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite);
3216eb03 1213
95616c82
OZ
1214 if (new)
1215 ifa_update(&ifa);
1216 else
1217 ifa_delete(&ifa);
3216eb03
OZ
1218
1219 if (!scan)
1220 if_end_partial_update(ifi);
95616c82
OZ
1221}
1222
9b136840
JMM
1223static void
1224nl_parse_addr(struct nlmsghdr *h, int scan)
1225{
1226 struct ifaddrmsg *i;
1227
1228 if (!(i = nl_checkin(h, sizeof(*i))))
1229 return;
1230
1231 int new = (h->nlmsg_type == RTM_NEWADDR);
1232
1233 switch (i->ifa_family)
1234 {
9b136840
JMM
1235 case AF_INET:
1236 return nl_parse_addr4(i, scan, new);
29a64162 1237
9b136840
JMM
1238 case AF_INET6:
1239 return nl_parse_addr6(i, scan, new);
9b136840
JMM
1240 }
1241}
1242
95616c82
OZ
1243void
1244kif_do_scan(struct kif_proto *p UNUSED)
1245{
1246 struct nlmsghdr *h;
1247
1248 if_start_update();
1249
e818f164 1250 nl_request_dump_link();
95616c82
OZ
1251 while (h = nl_get_scan())
1252 if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
1253 nl_parse_link(h, 1);
1254 else
1255 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1256
943478b0
OZ
1257 /* Re-resolve master interface for slaves */
1258 struct iface *i;
1259 WALK_LIST(i, iface_list)
1260 if (i->master_index)
1261 {
1262 struct iface f = {
1263 .flags = i->flags,
1264 .mtu = i->mtu,
1265 .index = i->index,
1266 .master_index = i->master_index,
1267 .master = if_find_by_index(i->master_index)
1268 };
1269
1270 if (f.master != i->master)
1271 {
1272 memcpy(f.name, i->name, sizeof(f.name));
1273 if_update(&f);
1274 }
1275 }
1276
e818f164 1277 nl_request_dump_addr(AF_INET);
95616c82
OZ
1278 while (h = nl_get_scan())
1279 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
3216eb03 1280 nl_parse_addr(h, 1);
95616c82
OZ
1281 else
1282 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1283
e818f164 1284 nl_request_dump_addr(AF_INET6);
d7661fbe
JMM
1285 while (h = nl_get_scan())
1286 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
1287 nl_parse_addr(h, 1);
1288 else
1289 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1290
95616c82
OZ
1291 if_end_update();
1292}
1293
1294/*
1295 * Routes
1296 */
1297
9ddbfbdd
JMM
1298static inline u32
1299krt_table_id(struct krt_proto *p)
1300{
1301 return KRT_CF->sys.table_id;
1302}
1303
1304static HASH(struct krt_proto) nl_table_map;
1305
29a64162
OZ
1306#define RTH_KEY(p) p->af, krt_table_id(p)
1307#define RTH_NEXT(p) p->sys.hash_next
1308#define RTH_EQ(a1,i1,a2,i2) a1 == a2 && i1 == i2
1309#define RTH_FN(a,i) a ^ u32_hash(i)
9ddbfbdd
JMM
1310
1311#define RTH_REHASH rth_rehash
1312#define RTH_PARAMS /8, *2, 2, 2, 6, 20
1313
1314HASH_DEFINE_REHASH_FN(RTH, struct krt_proto)
95616c82
OZ
1315
1316int
1317krt_capable(rte *e)
1318{
1319 rta *a = e->attrs;
1320
95616c82 1321 switch (a->dest)
62e64905 1322 {
4e276a89 1323 case RTD_UNICAST:
95616c82
OZ
1324 case RTD_BLACKHOLE:
1325 case RTD_UNREACHABLE:
1326 case RTD_PROHIBIT:
62e64905
OZ
1327 return 1;
1328
95616c82
OZ
1329 default:
1330 return 0;
62e64905 1331 }
95616c82
OZ
1332}
1333
1334static inline int
4e276a89 1335nh_bufsize(struct nexthop *nh)
95616c82
OZ
1336{
1337 int rv = 0;
1338 for (; nh != NULL; nh = nh->next)
9fdf9d29 1339 rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
95616c82
OZ
1340 return rv;
1341}
1342
1343static int
13c0be19 1344nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh)
95616c82
OZ
1345{
1346 eattr *ea;
1347 net *net = e->net;
1348 rta *a = e->attrs;
13c0be19 1349 ea_list *eattrs = a->eattrs;
4e276a89 1350 int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh));
4adcb9df 1351 u32 priority = 0;
a8caff32 1352
95616c82
OZ
1353 struct {
1354 struct nlmsghdr h;
1355 struct rtmsg r;
a8caff32
JMM
1356 char buf[0];
1357 } *r;
1358
1359 int rsize = sizeof(*r) + bufsize;
1360 r = alloca(rsize);
95616c82 1361
cc5b93f7 1362 DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op);
95616c82 1363
a8caff32
JMM
1364 bzero(&r->h, sizeof(r->h));
1365 bzero(&r->r, sizeof(r->r));
cc5b93f7 1366 r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE;
a8caff32 1367 r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
cc5b93f7 1368 r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK;
95616c82 1369
a8caff32
JMM
1370 r->r.rtm_family = p->af;
1371 r->r.rtm_dst_len = net_pxlen(net->n.addr);
1372 r->r.rtm_protocol = RTPROT_BIRD;
7074be22 1373 r->r.rtm_scope = RT_SCOPE_NOWHERE;
6b0f5f68 1374#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
1375 if (p->af == AF_MPLS)
1376 {
66acbc8d
OZ
1377 /*
1378 * Kernel MPLS code is a bit picky. We must:
1379 * 1) Always set RT_SCOPE_UNIVERSE and RTN_UNICAST (even for RTM_DELROUTE)
1380 * 2) Never use RTA_PRIORITY
1381 */
1382
d14f8c3c
JMM
1383 u32 label = net_mpls(net->n.addr);
1384 nl_add_attr_mpls(&r->h, rsize, RTA_DST, 1, &label);
66acbc8d
OZ
1385 r->r.rtm_scope = RT_SCOPE_UNIVERSE;
1386 r->r.rtm_type = RTN_UNICAST;
d14f8c3c
JMM
1387 }
1388 else
6b0f5f68 1389#endif
be17805c 1390 {
d14f8c3c 1391 nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr));
95616c82 1392
be17805c
OZ
1393 /* Add source address for IPv6 SADR routes */
1394 if (net->n.addr->type == NET_IP6_SADR)
1395 {
1396 net_addr_ip6_sadr *a = (void *) &net->n.addr;
1397 nl_add_attr_ip6(&r->h, rsize, RTA_SRC, a->src_prefix);
1398 r->r.rtm_src_len = a->src_pxlen;
1399 }
1400 }
1401
2feaa693
OZ
1402 /*
1403 * Strange behavior for RTM_DELROUTE:
1404 * 1) rtm_family is ignored in IPv6, works for IPv4
1405 * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
1406 * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
1407 */
1408
9ddbfbdd 1409 if (krt_table_id(p) < 256)
a8caff32 1410 r->r.rtm_table = krt_table_id(p);
9ddbfbdd 1411 else
a8caff32 1412 nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p));
9ddbfbdd 1413
66acbc8d
OZ
1414 if (p->af == AF_MPLS)
1415 priority = 0;
1416 else if (a->source == RTS_DUMMY)
4adcb9df
OZ
1417 priority = e->u.krt.metric;
1418 else if (KRT_CF->sys.metric)
1419 priority = KRT_CF->sys.metric;
1420 else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC)))
1421 priority = ea->u.data;
78a2cc28 1422
4adcb9df 1423 if (priority)
d1b8fe93 1424 nl_add_attr_u32(&r->h, rsize, RTA_PRIORITY, priority);
78a2cc28 1425
2feaa693
OZ
1426 /* For route delete, we do not specify remaining route attributes */
1427 if (op == NL_OP_DELETE)
1428 goto dest;
78a2cc28 1429
6e75d0d2 1430 /* Default scope is LINK for device routes, UNIVERSE otherwise */
66acbc8d
OZ
1431 if (p->af == AF_MPLS)
1432 r->r.rtm_scope = RT_SCOPE_UNIVERSE;
1433 else if (ea = ea_find(eattrs, EA_KRT_SCOPE))
cc5b93f7 1434 r->r.rtm_scope = ea->u.data;
6e75d0d2 1435 else
4e276a89 1436 r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
95616c82
OZ
1437
1438 if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
a8caff32 1439 nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
95616c82
OZ
1440
1441 if (ea = ea_find(eattrs, EA_KRT_REALM))
a8caff32 1442 nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data);
95616c82 1443
9fdf9d29
OZ
1444
1445 u32 metrics[KRT_METRICS_MAX];
1446 metrics[0] = 0;
1447
1448 struct ea_walk_state ews = { .eattrs = eattrs };
1449 while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX))
1450 {
1451 int id = ea->id - EA_KRT_METRICS;
1452 metrics[0] |= 1 << id;
1453 metrics[id] = ea->u.data;
1454 }
1455
1456 if (metrics[0])
a8caff32 1457 nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX);
9fdf9d29
OZ
1458
1459
2feaa693 1460dest:
2feaa693 1461 switch (dest)
95616c82 1462 {
4e276a89 1463 case RTD_UNICAST:
a8caff32 1464 r->r.rtm_type = RTN_UNICAST;
4e276a89 1465 if (nh->next && !krt_ecmp6(p))
21f9acd2 1466 nl_add_multipath(&r->h, rsize, nh, p->af, eattrs);
4e276a89
JMM
1467 else
1468 {
1469 nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index);
d14f8c3c 1470 nl_add_nexthop(&r->h, rsize, nh, p->af);
a1f5e514
OZ
1471
1472 if (nh->flags & RNF_ONLINK)
1473 r->r.rtm_flags |= RTNH_F_ONLINK;
4e276a89 1474 }
95616c82
OZ
1475 break;
1476 case RTD_BLACKHOLE:
a8caff32 1477 r->r.rtm_type = RTN_BLACKHOLE;
95616c82
OZ
1478 break;
1479 case RTD_UNREACHABLE:
a8caff32 1480 r->r.rtm_type = RTN_UNREACHABLE;
95616c82
OZ
1481 break;
1482 case RTD_PROHIBIT:
a8caff32 1483 r->r.rtm_type = RTN_PROHIBIT;
95616c82 1484 break;
2feaa693
OZ
1485 case RTD_NONE:
1486 break;
95616c82
OZ
1487 default:
1488 bug("krt_capable inconsistent with nl_send_route");
1489 }
1490
2feaa693 1491 /* Ignore missing for DELETE */
cc5b93f7 1492 return nl_exchange(&r->h, (op == NL_OP_DELETE));
2feaa693
OZ
1493}
1494
1495static inline int
13c0be19 1496nl_add_rte(struct krt_proto *p, rte *e)
2feaa693
OZ
1497{
1498 rta *a = e->attrs;
1499 int err = 0;
1500
4e276a89 1501 if (krt_ecmp6(p) && a->nh.next)
2feaa693 1502 {
4e276a89 1503 struct nexthop *nh = &(a->nh);
2feaa693 1504
13c0be19 1505 err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, nh);
2feaa693
OZ
1506 if (err < 0)
1507 return err;
1508
1509 for (nh = nh->next; nh; nh = nh->next)
13c0be19 1510 err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, nh);
2feaa693
OZ
1511
1512 return err;
1513 }
1514
13c0be19 1515 return nl_send_route(p, e, NL_OP_ADD, a->dest, &(a->nh));
2feaa693
OZ
1516}
1517
1518static inline int
13c0be19 1519nl_delete_rte(struct krt_proto *p, rte *e)
2feaa693
OZ
1520{
1521 int err = 0;
1522
1523 /* For IPv6, we just repeatedly request DELETE until we get error */
1524 do
13c0be19 1525 err = nl_send_route(p, e, NL_OP_DELETE, RTD_NONE, NULL);
2feaa693
OZ
1526 while (krt_ecmp6(p) && !err);
1527
1528 return err;
95616c82
OZ
1529}
1530
8235c474
OZ
1531static inline int
1532nl_replace_rte(struct krt_proto *p, rte *e)
1533{
1534 rta *a = e->attrs;
1535 return nl_send_route(p, e, NL_OP_REPLACE, a->dest, &(a->nh));
1536}
1537
1538
95616c82 1539void
cc75b3e1 1540krt_replace_rte(struct krt_proto *p, net *n UNUSED, rte *new, rte *old)
95616c82
OZ
1541{
1542 int err = 0;
1543
1544 /*
8235c474
OZ
1545 * We use NL_OP_REPLACE for IPv4, it has an issue with not checking for
1546 * matching rtm_protocol, but that is OK when dedicated priority is used.
2feaa693 1547 *
8235c474
OZ
1548 * We do not use NL_OP_REPLACE for IPv6, as it has broken semantics for ECMP
1549 * and with some kernel versions ECMP replace crashes kernel. Would need more
1550 * testing and checks for kernel versions.
2feaa693 1551 *
8235c474
OZ
1552 * For IPv6, we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the
1553 * old route value, so we do not try to optimize IPv6 ECMP reconfigurations.
95616c82
OZ
1554 */
1555
8235c474
OZ
1556 if (krt_ipv4(p) && old && new)
1557 {
1558 err = nl_replace_rte(p, new);
1559 }
1560 else
1561 {
1562 if (old)
1563 nl_delete_rte(p, old);
95616c82 1564
8235c474
OZ
1565 if (new)
1566 err = nl_add_rte(p, new);
1567 }
95616c82 1568
cc75b3e1
OZ
1569 if (new)
1570 {
1571 if (err < 0)
1572 bmap_clear(&p->sync_map, new->id);
1573 else
1574 bmap_set(&p->sync_map, new->id);
1575 }
95616c82
OZ
1576}
1577
2feaa693 1578static int
1187627a 1579nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type, uint rtm_family)
2feaa693 1580{
1187627a
OZ
1581 /* Route merging is used for IPv6 scans */
1582 if (!s->scan || (rtm_family != AF_INET6))
2feaa693
OZ
1583 return 0;
1584
1585 /* Saved and new route must have same network, proto/table, and priority */
1586 if ((s->net != net) || (s->proto != p) || (s->krt_metric != priority))
1587 return 0;
1588
1589 /* Both must be regular unicast routes */
1590 if ((s->krt_type != RTN_UNICAST) || (krt_type != RTN_UNICAST))
1591 return 0;
1592
1593 return 1;
1594}
1595
1596static void
1597nl_announce_route(struct nl_parse_state *s)
1598{
1599 rte *e = rte_get_temp(s->attrs);
1600 e->net = s->net;
1601 e->u.krt.src = s->krt_src;
1602 e->u.krt.proto = s->krt_proto;
1603 e->u.krt.seen = 0;
1604 e->u.krt.best = 0;
1605 e->u.krt.metric = s->krt_metric;
1606
1607 if (s->scan)
1608 krt_got_route(s->proto, e);
1609 else
1610 krt_got_route_async(s->proto, e, s->new);
1611
1612 s->net = NULL;
1613 s->attrs = NULL;
1614 s->proto = NULL;
1615 lp_flush(s->pool);
1616}
1617
1618static inline void
1187627a 1619nl_parse_begin(struct nl_parse_state *s, int scan)
2feaa693
OZ
1620{
1621 memset(s, 0, sizeof (struct nl_parse_state));
1622 s->pool = nl_linpool;
1623 s->scan = scan;
2feaa693
OZ
1624}
1625
1626static inline void
1627nl_parse_end(struct nl_parse_state *s)
1628{
1629 if (s->net)
1630 nl_announce_route(s);
1631}
1632
1633
d0dd1d20
OZ
1634#define SKIP0(ARG, ...) do { DBG("KRT: Ignoring route - " ARG, ##__VA_ARGS__); return; } while(0)
1635#define SKIP(ARG, ...) do { DBG("KRT: Ignoring route %N - " ARG, &dst, ##__VA_ARGS__); return; } while(0)
95616c82
OZ
1636
1637static void
2feaa693 1638nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
95616c82
OZ
1639{
1640 struct krt_proto *p;
1641 struct rtmsg *i;
ad276157 1642 struct rtattr *a[BIRD_RTA_MAX];
95616c82
OZ
1643 int new = h->nlmsg_type == RTM_NEWROUTE;
1644
be17805c 1645 net_addr dst, src = {};
95616c82 1646 u32 oif = ~0;
29a64162 1647 u32 table_id;
2feaa693 1648 u32 priority = 0;
6e75d0d2 1649 u32 def_scope = RT_SCOPE_UNIVERSE;
be17805c 1650 int krt_src;
95616c82 1651
ad276157 1652 if (!(i = nl_checkin(h, sizeof(*i))))
95616c82 1653 return;
ad276157
JMM
1654
1655 switch (i->rtm_family)
95616c82 1656 {
29a64162
OZ
1657 case AF_INET:
1658 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a)))
1659 return;
1660
1661 if (a[RTA_DST])
1662 net_fill_ip4(&dst, rta_get_ip4(a[RTA_DST]), i->rtm_dst_len);
1663 else
1664 net_fill_ip4(&dst, IP4_NONE, 0);
1665 break;
1666
cc5b93f7
OZ
1667 case AF_INET6:
1668 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a)))
1669 return;
29a64162
OZ
1670
1671 if (a[RTA_DST])
1672 net_fill_ip6(&dst, rta_get_ip6(a[RTA_DST]), i->rtm_dst_len);
1673 else
1674 net_fill_ip6(&dst, IP6_NONE, 0);
be17805c
OZ
1675
1676 if (a[RTA_SRC])
1677 net_fill_ip6(&src, rta_get_ip6(a[RTA_SRC]), i->rtm_src_len);
1678 else
1679 net_fill_ip6(&src, IP6_NONE, 0);
29a64162
OZ
1680 break;
1681
6b0f5f68 1682#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
1683 case AF_MPLS:
1684 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want_mpls, a, sizeof(a)))
1685 return;
1686
ed610044 1687 if (!a[RTA_DST])
d0dd1d20 1688 SKIP0("MPLS route without RTA_DST\n");
ed610044
OZ
1689
1690 if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) != 1)
d0dd1d20 1691 SKIP0("MPLS route with multi-label RTA_DST\n");
ed610044
OZ
1692
1693 net_fill_mpls(&dst, rta_mpls_stack[0]);
d14f8c3c 1694 break;
6b0f5f68 1695#endif
d14f8c3c 1696
29a64162
OZ
1697 default:
1698 return;
95616c82
OZ
1699 }
1700
95616c82 1701 if (a[RTA_OIF])
acb04cfd 1702 oif = rta_get_u32(a[RTA_OIF]);
95616c82 1703
9ddbfbdd 1704 if (a[RTA_TABLE])
29a64162 1705 table_id = rta_get_u32(a[RTA_TABLE]);
9ddbfbdd 1706 else
29a64162 1707 table_id = i->rtm_table;
9ddbfbdd 1708
d0dd1d20
OZ
1709 if (i->rtm_flags & RTM_F_CLONED)
1710 SKIP("cloned\n");
1711
29a64162
OZ
1712 /* Do we know this table? */
1713 p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id);
95616c82 1714 if (!p)
4659b2ae 1715 SKIP("unknown table %u\n", table_id);
95616c82 1716
be17805c
OZ
1717 if (a[RTA_SRC] && (p->p.net_type != NET_IP6_SADR))
1718 SKIP("src prefix for non-SADR channel\n");
1719
95616c82
OZ
1720 if (a[RTA_IIF])
1721 SKIP("IIF set\n");
29a64162 1722
95616c82
OZ
1723 if (i->rtm_tos != 0) /* We don't support TOS */
1724 SKIP("TOS %02x\n", i->rtm_tos);
95616c82 1725
2feaa693 1726 if (s->scan && !new)
95616c82
OZ
1727 SKIP("RTM_DELROUTE in scan\n");
1728
2feaa693
OZ
1729 if (a[RTA_PRIORITY])
1730 priority = rta_get_u32(a[RTA_PRIORITY]);
1731
9b136840 1732 int c = net_classify(&dst);
95616c82
OZ
1733 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
1734 SKIP("strange class/scope\n");
1735
95616c82
OZ
1736 switch (i->rtm_protocol)
1737 {
1738 case RTPROT_UNSPEC:
1739 SKIP("proto unspec\n");
1740
1741 case RTPROT_REDIRECT:
be17805c 1742 krt_src = KRT_SRC_REDIRECT;
95616c82
OZ
1743 break;
1744
1745 case RTPROT_KERNEL:
be17805c 1746 krt_src = KRT_SRC_KERNEL;
95616c82
OZ
1747 return;
1748
1749 case RTPROT_BIRD:
2feaa693 1750 if (!s->scan)
95616c82 1751 SKIP("echo\n");
be17805c 1752 krt_src = KRT_SRC_BIRD;
95616c82
OZ
1753 break;
1754
1755 case RTPROT_BOOT:
1756 default:
be17805c 1757 krt_src = KRT_SRC_ALIEN;
95616c82
OZ
1758 }
1759
be17805c
OZ
1760 net_addr *n = &dst;
1761 if (p->p.net_type == NET_IP6_SADR)
1762 {
1763 n = alloca(sizeof(net_addr_ip6_sadr));
1764 net_fill_ip6_sadr(n, net6_prefix(&dst), net6_pxlen(&dst),
1765 net6_prefix(&src), net6_pxlen(&src));
1766 }
1767
1768 net *net = net_get(p->p.main_channel->table, n);
95616c82 1769
1187627a 1770 if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type, i->rtm_family))
2feaa693
OZ
1771 nl_announce_route(s);
1772
d14f8c3c 1773 rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE);
2feaa693
OZ
1774 ra->src = p->p.main_source;
1775 ra->source = RTS_INHERIT;
1776 ra->scope = SCOPE_UNIVERSE;
95616c82 1777
21f9acd2
OZ
1778 if (a[RTA_FLOW])
1779 s->rta_flow = rta_get_u32(a[RTA_FLOW]);
1780 else
1781 s->rta_flow = 0;
1782
95616c82
OZ
1783 switch (i->rtm_type)
1784 {
1785 case RTN_UNICAST:
62e64905 1786 ra->dest = RTD_UNICAST;
95616c82 1787
98bb80a2 1788 if (a[RTA_MULTIPATH])
4ff15a75 1789 {
f5c8fb5f 1790 struct nexthop *nh = nl_parse_multipath(s, p, n, a[RTA_MULTIPATH], i->rtm_family, krt_src);
4e276a89 1791 if (!nh)
77d032c7 1792 SKIP("strange RTA_MULTIPATH\n");
9fdf9d29 1793
2eaf65ec 1794 nexthop_link(ra, nh);
95616c82
OZ
1795 break;
1796 }
1797
f5c8fb5f 1798 if ((i->rtm_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD))
77d032c7 1799 SKIP("ignore RTNH_F_DEAD\n");
df83f626 1800
4e276a89
JMM
1801 ra->nh.iface = if_find_by_index(oif);
1802 if (!ra->nh.iface)
95616c82 1803 {
fe9f1a6d 1804 log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif);
95616c82
OZ
1805 return;
1806 }
1807
53401bef
OZ
1808 if (a[RTA_GATEWAY])
1809 ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]);
1810
6b0f5f68 1811#ifdef HAVE_MPLS_KERNEL
53401bef
OZ
1812 if (a[RTA_VIA])
1813 ra->nh.gw = rta_get_via(a[RTA_VIA]);
6b0f5f68 1814#endif
95616c82 1815
53401bef
OZ
1816 if (ipa_nonzero(ra->nh.gw))
1817 {
95616c82 1818 /* Silently skip strange 6to4 routes */
0bf95f99 1819 const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96);
4e276a89 1820 if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit))
95616c82
OZ
1821 return;
1822
a1f5e514
OZ
1823 if (i->rtm_flags & RTNH_F_ONLINK)
1824 ra->nh.flags |= RNF_ONLINK;
1825
23c212e7 1826 neighbor *nbr;
586c1800
OZ
1827 nbr = neigh_find(&p->p, ra->nh.gw, ra->nh.iface,
1828 (ra->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0);
23c212e7 1829 if (!nbr || (nbr->scope == SCOPE_HOST))
95616c82 1830 {
4e276a89
JMM
1831 log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr,
1832 ra->nh.gw);
95616c82
OZ
1833 return;
1834 }
1835 }
95616c82
OZ
1836
1837 break;
1838 case RTN_BLACKHOLE:
2feaa693 1839 ra->dest = RTD_BLACKHOLE;
95616c82
OZ
1840 break;
1841 case RTN_UNREACHABLE:
2feaa693 1842 ra->dest = RTD_UNREACHABLE;
95616c82
OZ
1843 break;
1844 case RTN_PROHIBIT:
2feaa693 1845 ra->dest = RTD_PROHIBIT;
95616c82
OZ
1846 break;
1847 /* FIXME: What about RTN_THROW? */
1848 default:
1849 SKIP("type %d\n", i->rtm_type);
1850 return;
1851 }
1852
6b0f5f68 1853#ifdef HAVE_MPLS_KERNEL
d14f8c3c 1854 if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next)
2eaf65ec 1855 ra->nh.labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label);
d14f8c3c
JMM
1856
1857 if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next)
1858 {
1859 switch (rta_get_u16(a[RTA_ENCAP_TYPE]))
1860 {
1861 case LWTUNNEL_ENCAP_MPLS:
1862 {
1863 struct rtattr *enca[BIRD_RTA_MAX];
1864 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
1865 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
2eaf65ec 1866 ra->nh.labels = rta_get_mpls(enca[RTA_DST], ra->nh.label);
d14f8c3c
JMM
1867 break;
1868 }
1869 default:
1870 SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE]));
1871 break;
1872 }
1873 }
6b0f5f68 1874#endif
d14f8c3c 1875
6e75d0d2
OZ
1876 if (i->rtm_scope != def_scope)
1877 {
1878 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1879 ea->next = ra->eattrs;
1880 ra->eattrs = ea;
1881 ea->flags = EALF_SORTED;
1882 ea->count = 1;
1883 ea->attrs[0].id = EA_KRT_SCOPE;
1884 ea->attrs[0].flags = 0;
1885 ea->attrs[0].type = EAF_TYPE_INT;
1886 ea->attrs[0].u.data = i->rtm_scope;
1887 }
95616c82
OZ
1888
1889 if (a[RTA_PREFSRC])
1890 {
9b136840 1891 ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]);
95616c82 1892
2feaa693
OZ
1893 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1894 ea->next = ra->eattrs;
1895 ra->eattrs = ea;
95616c82
OZ
1896 ea->flags = EALF_SORTED;
1897 ea->count = 1;
1898 ea->attrs[0].id = EA_KRT_PREFSRC;
1899 ea->attrs[0].flags = 0;
1900 ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
4c553c5a
MM
1901
1902 struct adata *ad = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
1903 ad->length = sizeof(ps);
1904 memcpy(ad->data, &ps, sizeof(ps));
1905
1906 ea->attrs[0].u.ptr = ad;
95616c82
OZ
1907 }
1908
21f9acd2
OZ
1909 /* Can be set per-route or per-nexthop */
1910 if (s->rta_flow)
95616c82 1911 {
2feaa693
OZ
1912 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1913 ea->next = ra->eattrs;
1914 ra->eattrs = ea;
95616c82
OZ
1915 ea->flags = EALF_SORTED;
1916 ea->count = 1;
1917 ea->attrs[0].id = EA_KRT_REALM;
1918 ea->attrs[0].flags = 0;
1919 ea->attrs[0].type = EAF_TYPE_INT;
21f9acd2 1920 ea->attrs[0].u.data = s->rta_flow;
95616c82
OZ
1921 }
1922
9fdf9d29
OZ
1923 if (a[RTA_METRICS])
1924 {
1925 u32 metrics[KRT_METRICS_MAX];
2feaa693 1926 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
9fdf9d29
OZ
1927 int t, n = 0;
1928
1929 if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
1930 {
fe9f1a6d 1931 log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net->n.addr);
9fdf9d29
OZ
1932 return;
1933 }
1934
1935 for (t = 1; t < KRT_METRICS_MAX; t++)
1936 if (metrics[0] & (1 << t))
1937 {
ee7e2ffd 1938 ea->attrs[n].id = EA_CODE(PROTOCOL_KERNEL, KRT_METRICS_OFFSET + t);
9fdf9d29
OZ
1939 ea->attrs[n].flags = 0;
1940 ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */
1941 ea->attrs[n].u.data = metrics[t];
1942 n++;
1943 }
1944
1945 if (n > 0)
1946 {
2feaa693 1947 ea->next = ra->eattrs;
9fdf9d29
OZ
1948 ea->flags = EALF_SORTED;
1949 ea->count = n;
2feaa693 1950 ra->eattrs = ea;
9fdf9d29
OZ
1951 }
1952 }
1953
2feaa693
OZ
1954 /*
1955 * Ideally, now we would send the received route to the rest of kernel code.
98bb80a2
OZ
1956 * But IPv6 ECMP routes before 4.11 are sent as a sequence of routes, so we
1957 * postpone it and merge next hops until the end of the sequence. Note that
3e792350
OZ
1958 * when doing merging of next hops, we expect the new route to be unipath.
1959 * Otherwise, we ignore additional next hops in nexthop_insert().
2feaa693
OZ
1960 */
1961
1962 if (!s->net)
1963 {
1964 /* Store the new route */
1965 s->net = net;
1966 s->attrs = ra;
1967 s->proto = p;
1968 s->new = new;
be17805c 1969 s->krt_src = krt_src;
2feaa693
OZ
1970 s->krt_type = i->rtm_type;
1971 s->krt_proto = i->rtm_protocol;
1972 s->krt_metric = priority;
1973 }
95616c82 1974 else
2feaa693
OZ
1975 {
1976 /* Merge next hops with the stored route */
62e64905 1977 rta *oa = s->attrs;
2feaa693 1978
62e64905
OZ
1979 struct nexthop *nhs = &oa->nh;
1980 nexthop_insert(&nhs, &ra->nh);
1981
1982 /* Perhaps new nexthop is inserted at the first position */
1983 if (nhs == &ra->nh)
1984 {
1985 /* Swap rtas */
1986 s->attrs = ra;
1987
1988 /* Keep old eattrs */
1989 ra->eattrs = oa->eattrs;
1990 }
2feaa693 1991 }
95616c82
OZ
1992}
1993
1994void
534d0a4b 1995krt_do_scan(struct krt_proto *p)
95616c82
OZ
1996{
1997 struct nlmsghdr *h;
2feaa693 1998 struct nl_parse_state s;
95616c82 1999
1187627a 2000 nl_parse_begin(&s, 1);
534d0a4b
OZ
2001
2002 /* Table-specific scan or shared scan */
2003 if (p)
2004 nl_request_dump_route(p->af, krt_table_id(p));
2005 else
2006 nl_request_dump_route(AF_UNSPEC, 0);
2007
95616c82 2008 while (h = nl_get_scan())
534d0a4b 2009 {
95616c82 2010 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
2feaa693 2011 nl_parse_route(&s, h);
95616c82
OZ
2012 else
2013 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
534d0a4b
OZ
2014 }
2015
cc5b93f7 2016 nl_parse_end(&s);
95616c82
OZ
2017}
2018
2019/*
2020 * Asynchronous Netlink interface
2021 */
2022
2023static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
2024static byte *nl_async_rx_buffer; /* Receive buffer */
81ee6cda
OZ
2025static uint nl_async_bufsize; /* Kernel rx buffer size for the netlink socket */
2026static struct config *nl_last_config; /* For tracking changes to nl_async_bufsize */
95616c82
OZ
2027
2028static void
2029nl_async_msg(struct nlmsghdr *h)
2030{
2feaa693
OZ
2031 struct nl_parse_state s;
2032
95616c82
OZ
2033 switch (h->nlmsg_type)
2034 {
2035 case RTM_NEWROUTE:
2036 case RTM_DELROUTE:
2037 DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
1187627a 2038 nl_parse_begin(&s, 0);
2feaa693
OZ
2039 nl_parse_route(&s, h);
2040 nl_parse_end(&s);
95616c82
OZ
2041 break;
2042 case RTM_NEWLINK:
2043 case RTM_DELLINK:
2044 DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
1e4891e4
OZ
2045 if (kif_proto)
2046 nl_parse_link(h, 0);
95616c82
OZ
2047 break;
2048 case RTM_NEWADDR:
2049 case RTM_DELADDR:
2050 DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
1e4891e4
OZ
2051 if (kif_proto)
2052 nl_parse_addr(h, 0);
95616c82
OZ
2053 break;
2054 default:
2055 DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
2056 }
2057}
2058
2059static int
3e236955 2060nl_async_hook(sock *sk, uint size UNUSED)
95616c82
OZ
2061{
2062 struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
2063 struct sockaddr_nl sa;
31e9e101
ST
2064 struct msghdr m = {
2065 .msg_name = &sa,
2066 .msg_namelen = sizeof(sa),
2067 .msg_iov = &iov,
2068 .msg_iovlen = 1,
2069 };
95616c82
OZ
2070 struct nlmsghdr *h;
2071 int x;
ae80a2de 2072 uint len;
95616c82
OZ
2073
2074 x = recvmsg(sk->fd, &m, 0);
2075 if (x < 0)
2076 {
2077 if (errno == ENOBUFS)
2078 {
2079 /*
2080 * Netlink reports some packets have been thrown away.
2081 * One day we might react to it by asking for route table
2082 * scan in near future.
2083 */
2c33da50 2084 log(L_WARN "Kernel dropped some netlink messages, will resync on next scan.");
95616c82
OZ
2085 return 1; /* More data are likely to be ready */
2086 }
2087 else if (errno != EWOULDBLOCK)
2088 log(L_ERR "Netlink recvmsg: %m");
2089 return 0;
2090 }
2091 if (sa.nl_pid) /* It isn't from the kernel */
2092 {
2093 DBG("Non-kernel packet\n");
2094 return 1;
2095 }
2096 h = (void *) nl_async_rx_buffer;
2097 len = x;
2098 if (m.msg_flags & MSG_TRUNC)
2099 {
2100 log(L_WARN "Netlink got truncated asynchronous message");
2101 return 1;
2102 }
2103 while (NLMSG_OK(h, len))
2104 {
2105 nl_async_msg(h);
2106 h = NLMSG_NEXT(h, len);
2107 }
2108 if (len)
2109 log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
2110 return 1;
2111}
2112
ccd2a3ed
JMM
2113static void
2114nl_async_err_hook(sock *sk, int e UNUSED)
2115{
2116 nl_async_hook(sk, 0);
2117}
2118
95616c82
OZ
2119static void
2120nl_open_async(void)
2121{
2122 sock *sk;
2123 struct sockaddr_nl sa;
2124 int fd;
95616c82 2125
f83ce94d 2126 if (nl_async_sk)
95616c82 2127 return;
95616c82
OZ
2128
2129 DBG("KRT: Opening async netlink socket\n");
2130
2131 fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
2132 if (fd < 0)
2133 {
2134 log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
2135 return;
2136 }
2137
2138 bzero(&sa, sizeof(sa));
2139 sa.nl_family = AF_NETLINK;
29a64162
OZ
2140 sa.nl_groups = RTMGRP_LINK |
2141 RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE |
2142 RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
2143
95616c82
OZ
2144 if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
2145 {
2146 log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
f83ce94d 2147 close(fd);
95616c82
OZ
2148 return;
2149 }
2150
f83ce94d
OZ
2151 nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
2152
95616c82
OZ
2153 sk = nl_async_sk = sk_new(krt_pool);
2154 sk->type = SK_MAGIC;
2155 sk->rx_hook = nl_async_hook;
ccd2a3ed 2156 sk->err_hook = nl_async_err_hook;
95616c82 2157 sk->fd = fd;
05476c4d 2158 if (sk_open(sk) < 0)
95616c82 2159 bug("Netlink: sk_open failed");
95616c82
OZ
2160}
2161
81ee6cda
OZ
2162static void
2163nl_update_async_bufsize(void)
2164{
2165 /* No async socket */
2166 if (!nl_async_sk)
2167 return;
2168
2169 /* Already reconfigured */
2170 if (nl_last_config == config)
2171 return;
2172
2173 /* Update netlink buffer size */
2174 uint bufsize = nl_cfg_rx_buffer_size(config);
2175 if (bufsize && (bufsize != nl_async_bufsize))
2176 {
2177 /* Log message for reconfigurations only */
2178 if (nl_last_config)
2179 log(L_INFO "KRT: Changing netlink rx buffer size to %u", bufsize);
2180
2181 nl_set_rcvbuf(nl_async_sk->fd, bufsize);
2182 nl_async_bufsize = bufsize;
2183 }
2184
2185 nl_last_config = config;
2186}
2187
9ddbfbdd 2188
95616c82
OZ
2189/*
2190 * Interface to the UNIX krt module
2191 */
2192
95616c82 2193void
9ddbfbdd
JMM
2194krt_sys_io_init(void)
2195{
05d47bd5 2196 nl_linpool = lp_new_default(krt_pool);
9ddbfbdd
JMM
2197 HASH_INIT(nl_table_map, krt_pool, 6);
2198}
2199
2200int
c6964c30 2201krt_sys_start(struct krt_proto *p)
95616c82 2202{
29a64162 2203 struct krt_proto *old = HASH_FIND(nl_table_map, RTH, p->af, krt_table_id(p));
9ddbfbdd
JMM
2204
2205 if (old)
2206 {
2207 log(L_ERR "%s: Kernel table %u already registered by %s",
2208 p->p.name, krt_table_id(p), old->p.name);
2209 return 0;
2210 }
2211
2212 HASH_INSERT2(nl_table_map, RTH, krt_pool, p);
c6964c30
OZ
2213
2214 nl_open();
2215 nl_open_async();
81ee6cda 2216 nl_update_async_bufsize();
9ddbfbdd
JMM
2217
2218 return 1;
95616c82
OZ
2219}
2220
2221void
9ddbfbdd 2222krt_sys_shutdown(struct krt_proto *p)
95616c82 2223{
81ee6cda
OZ
2224 nl_update_async_bufsize();
2225
9ddbfbdd 2226 HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
95616c82
OZ
2227}
2228
2229int
2230krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
2231{
81ee6cda
OZ
2232 nl_update_async_bufsize();
2233
4adcb9df 2234 return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
95616c82
OZ
2235}
2236
95616c82
OZ
2237void
2238krt_sys_init_config(struct krt_config *cf)
2239{
2240 cf->sys.table_id = RT_TABLE_MAIN;
bff21441 2241 cf->sys.metric = 32;
95616c82
OZ
2242}
2243
2244void
2245krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
2246{
2247 d->sys.table_id = s->sys.table_id;
4adcb9df 2248 d->sys.metric = s->sys.metric;
95616c82
OZ
2249}
2250
9fdf9d29
OZ
2251static const char *krt_metrics_names[KRT_METRICS_MAX] = {
2252 NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
2253 "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
2254};
2255
2256static const char *krt_features_names[KRT_FEATURES_MAX] = {
2257 "ecn", NULL, NULL, "allfrag"
2258};
2259
2260int
258be565 2261krt_sys_get_attr(const eattr *a, byte *buf, int buflen UNUSED)
9fdf9d29
OZ
2262{
2263 switch (a->id)
2264 {
2265 case EA_KRT_PREFSRC:
2266 bsprintf(buf, "prefsrc");
2267 return GA_NAME;
2268
2269 case EA_KRT_REALM:
2270 bsprintf(buf, "realm");
2271 return GA_NAME;
2272
6e75d0d2
OZ
2273 case EA_KRT_SCOPE:
2274 bsprintf(buf, "scope");
2275 return GA_NAME;
2276
9fdf9d29
OZ
2277 case EA_KRT_LOCK:
2278 buf += bsprintf(buf, "lock:");
2279 ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
2280 return GA_FULL;
2281
2282 case EA_KRT_FEATURES:
2283 buf += bsprintf(buf, "features:");
2284 ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
2285 return GA_FULL;
2286
2287 default:;
2288 int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET;
2289 if (id > 0 && id < KRT_METRICS_MAX)
2290 {
2291 bsprintf(buf, "%s", krt_metrics_names[id]);
2292 return GA_NAME;
2293 }
2294
2295 return GA_UNKNOWN;
2296 }
2297}
2298
95616c82
OZ
2299
2300
2301void
2302kif_sys_start(struct kif_proto *p UNUSED)
2303{
2304 nl_open();
2305 nl_open_async();
2306}
2307
2308void
2309kif_sys_shutdown(struct kif_proto *p UNUSED)
2310{
2311}
153f02da
OZ
2312
2313int
2314kif_update_sysdep_addr(struct iface *i UNUSED)
2315{
2316 return 0;
2317}