]> git.ipfire.org Git - thirdparty/bird.git/blame - sysdep/linux/netlink.c
Minor cleanups and fixes
[thirdparty/bird.git] / sysdep / linux / netlink.c
CommitLineData
95616c82
OZ
1/*
2 * BIRD -- Linux Netlink Interface
3 *
4 * (c) 1999--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
a8caff32 9#include <alloca.h>
95616c82 10#include <stdio.h>
f83ce94d 11#include <unistd.h>
95616c82
OZ
12#include <fcntl.h>
13#include <sys/socket.h>
14#include <sys/uio.h>
15#include <errno.h>
16
17#undef LOCAL_DEBUG
18
19#include "nest/bird.h"
20#include "nest/route.h"
21#include "nest/protocol.h"
22#include "nest/iface.h"
4e276a89 23#include "lib/alloca.h"
7152e5ef
JMM
24#include "sysdep/unix/timer.h"
25#include "sysdep/unix/unix.h"
26#include "sysdep/unix/krt.h"
95616c82
OZ
27#include "lib/socket.h"
28#include "lib/string.h"
9ddbfbdd 29#include "lib/hash.h"
95616c82
OZ
30#include "conf/conf.h"
31
32#include <asm/types.h>
33#include <linux/if.h>
54635f43 34#ifdef HAVE_LWTUNNEL
d14f8c3c 35#include <linux/lwtunnel.h>
54635f43
JMM
36#else
37#include "sysdep/linux/lwtunnel.h"
38#endif
95616c82
OZ
39#include <linux/netlink.h>
40#include <linux/rtnetlink.h>
41
9ddbfbdd 42
95616c82
OZ
43#ifndef MSG_TRUNC /* Hack: Several versions of glibc miss this one :( */
44#define MSG_TRUNC 0x20
45#endif
46
a08a81c6
OZ
47#ifndef IFA_FLAGS
48#define IFA_FLAGS 8
49#endif
50
95616c82
OZ
51#ifndef IFF_LOWER_UP
52#define IFF_LOWER_UP 0x10000
53#endif
54
9ddbfbdd
JMM
55#ifndef RTA_TABLE
56#define RTA_TABLE 15
57#endif
58
d14f8c3c
JMM
59#ifndef RTA_VIA
60#define RTA_VIA 18
61#endif
62
97e48b6a
JMM
63#ifndef HAVE_STRUCT_RTVIA
64struct rtvia {
734e9fb8
OZ
65 unsigned short rtvia_family;
66 u8 rtvia_addr[0];
97e48b6a
JMM
67};
68#endif
69
d14f8c3c
JMM
70#ifndef RTA_NEWDST
71#define RTA_NEWDST 19
72#endif
73
74#ifndef RTA_ENCAP_TYPE
75#define RTA_ENCAP_TYPE 21
76#endif
77
78#ifndef RTA_ENCAP
79#define RTA_ENCAP 22
80#endif
9ddbfbdd 81
cc5b93f7 82#define krt_ecmp6(p) ((p)->af == AF_INET6)
2feaa693
OZ
83
84/*
85 * Structure nl_parse_state keeps state of received route processing. Ideally,
86 * we could just independently parse received Netlink messages and immediately
87 * propagate received routes to the rest of BIRD, but Linux kernel represents
88 * and announces IPv6 ECMP routes not as one route with multiple next hops (like
89 * RTA_MULTIPATH in IPv4 ECMP), but as a set of routes with the same prefix.
90 *
91 * Therefore, BIRD keeps currently processed route in nl_parse_state structure
92 * and postpones its propagation until we expect it to be final; i.e., when
93 * non-matching route is received or when the scan ends. When another matching
94 * route is received, it is merged with the already processed route to form an
95 * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
96 * postponing is done in both cases (for simplicity). All IPv4 routes are just
97 * considered non-matching.
98 *
99 * This is ignored for asynchronous notifications (every notification is handled
100 * as a separate route). It is not an issue for our routes, as we ignore such
101 * notifications anyways. But importing alien IPv6 ECMP routes does not work
102 * properly.
103 */
104
105struct nl_parse_state
106{
107 struct linpool *pool;
108 int scan;
109 int merge;
110
111 net *net;
112 rta *attrs;
113 struct krt_proto *proto;
114 s8 new;
115 s8 krt_src;
116 u8 krt_type;
117 u8 krt_proto;
118 u32 krt_metric;
119};
120
95616c82
OZ
121/*
122 * Synchronous Netlink interface
123 */
124
125struct nl_sock
126{
127 int fd;
128 u32 seq;
129 byte *rx_buffer; /* Receive buffer */
130 struct nlmsghdr *last_hdr; /* Recently received packet */
ae80a2de 131 uint last_size;
95616c82
OZ
132};
133
134#define NL_RX_SIZE 8192
135
2feaa693
OZ
136#define NL_OP_DELETE 0
137#define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
138#define NL_OP_REPLACE (NLM_F_CREATE|NLM_F_REPLACE)
139#define NL_OP_APPEND (NLM_F_CREATE|NLM_F_APPEND)
140
141static linpool *nl_linpool;
142
95616c82
OZ
143static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for synchronous scan */
144static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */
145
146static void
147nl_open_sock(struct nl_sock *nl)
148{
149 if (nl->fd < 0)
150 {
151 nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
152 if (nl->fd < 0)
153 die("Unable to open rtnetlink socket: %m");
154 nl->seq = now;
155 nl->rx_buffer = xmalloc(NL_RX_SIZE);
156 nl->last_hdr = NULL;
157 nl->last_size = 0;
158 }
159}
160
161static void
162nl_open(void)
163{
164 nl_open_sock(&nl_scan);
165 nl_open_sock(&nl_req);
166}
167
168static void
169nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
170{
171 struct sockaddr_nl sa;
172
173 memset(&sa, 0, sizeof(sa));
174 sa.nl_family = AF_NETLINK;
175 nh->nlmsg_pid = 0;
176 nh->nlmsg_seq = ++(nl->seq);
177 if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
178 die("rtnetlink sendto: %m");
179 nl->last_hdr = NULL;
180}
181
182static void
86c3eea0 183nl_request_dump(int af, int cmd)
95616c82
OZ
184{
185 struct {
186 struct nlmsghdr nh;
187 struct rtgenmsg g;
641172c6
OZ
188 } req = {
189 .nh.nlmsg_type = cmd,
190 .nh.nlmsg_len = sizeof(req),
191 .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
192 .g.rtgen_family = af
193 };
95616c82
OZ
194 nl_send(&nl_scan, &req.nh);
195}
196
197static struct nlmsghdr *
198nl_get_reply(struct nl_sock *nl)
199{
200 for(;;)
201 {
202 if (!nl->last_hdr)
203 {
204 struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
205 struct sockaddr_nl sa;
31e9e101
ST
206 struct msghdr m = {
207 .msg_name = &sa,
208 .msg_namelen = sizeof(sa),
209 .msg_iov = &iov,
210 .msg_iovlen = 1,
211 };
95616c82
OZ
212 int x = recvmsg(nl->fd, &m, 0);
213 if (x < 0)
214 die("nl_get_reply: %m");
215 if (sa.nl_pid) /* It isn't from the kernel */
216 {
217 DBG("Non-kernel packet\n");
218 continue;
219 }
220 nl->last_size = x;
221 nl->last_hdr = (void *) nl->rx_buffer;
222 if (m.msg_flags & MSG_TRUNC)
223 bug("nl_get_reply: got truncated reply which should be impossible");
224 }
225 if (NLMSG_OK(nl->last_hdr, nl->last_size))
226 {
227 struct nlmsghdr *h = nl->last_hdr;
228 nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
229 if (h->nlmsg_seq != nl->seq)
230 {
231 log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
232 h->nlmsg_seq, nl->seq);
233 continue;
234 }
235 return h;
236 }
237 if (nl->last_size)
238 log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
239 nl->last_hdr = NULL;
240 }
241}
242
1123e707 243static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
95616c82
OZ
244
245static int
2feaa693 246nl_error(struct nlmsghdr *h, int ignore_esrch)
95616c82
OZ
247{
248 struct nlmsgerr *e;
249 int ec;
250
251 if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
252 {
253 log(L_WARN "Netlink: Truncated error message received");
254 return ENOBUFS;
255 }
256 e = (struct nlmsgerr *) NLMSG_DATA(h);
257 ec = -e->error;
2feaa693 258 if (ec && !(ignore_esrch && (ec == ESRCH)))
95616c82
OZ
259 log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
260 return ec;
261}
262
263static struct nlmsghdr *
264nl_get_scan(void)
265{
266 struct nlmsghdr *h = nl_get_reply(&nl_scan);
267
268 if (h->nlmsg_type == NLMSG_DONE)
269 return NULL;
270 if (h->nlmsg_type == NLMSG_ERROR)
271 {
2feaa693 272 nl_error(h, 0);
95616c82
OZ
273 return NULL;
274 }
275 return h;
276}
277
278static int
2feaa693 279nl_exchange(struct nlmsghdr *pkt, int ignore_esrch)
95616c82
OZ
280{
281 struct nlmsghdr *h;
282
283 nl_send(&nl_req, pkt);
284 for(;;)
285 {
286 h = nl_get_reply(&nl_req);
287 if (h->nlmsg_type == NLMSG_ERROR)
288 break;
289 log(L_WARN "nl_exchange: Unexpected reply received");
290 }
2feaa693 291 return nl_error(h, ignore_esrch) ? -1 : 0;
95616c82
OZ
292}
293
294/*
295 * Netlink attributes
296 */
297
298static int nl_attr_len;
299
300static void *
301nl_checkin(struct nlmsghdr *h, int lsize)
302{
303 nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
304 if (nl_attr_len < 0)
305 {
306 log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
307 return NULL;
308 }
309 return NLMSG_DATA(h);
310}
311
ad276157
JMM
312struct nl_want_attrs {
313 u8 defined:1;
314 u8 checksize:1;
315 u8 size;
316};
317
318
319#define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
320
321static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = {
322 [IFLA_IFNAME] = { 1, 0, 0 },
323 [IFLA_MTU] = { 1, 1, sizeof(u32) },
324 [IFLA_WIRELESS] = { 1, 0, 0 },
325};
326
29a64162 327
e37d2e3e 328#define BIRD_IFA_MAX (IFA_FLAGS+1)
ad276157 329
ad276157
JMM
330static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = {
331 [IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) },
332 [IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) },
333 [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) },
62e64905 334 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
ad276157 335};
29a64162 336
ad276157
JMM
337static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
338 [IFA_ADDRESS] = { 1, 1, sizeof(ip6_addr) },
339 [IFA_LOCAL] = { 1, 1, sizeof(ip6_addr) },
e37d2e3e 340 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
ad276157 341};
29a64162 342
ad276157 343
d14f8c3c 344#define BIRD_RTA_MAX (RTA_ENCAP+1)
ad276157 345
4e276a89 346static struct nl_want_attrs nexthop_attr_want4[BIRD_RTA_MAX] = {
ad276157 347 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
d14f8c3c
JMM
348 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
349 [RTA_ENCAP] = { 1, 0, 0 },
350};
351
352static struct nl_want_attrs encap_mpls_want[BIRD_RTA_MAX] = {
353 [RTA_DST] = { 1, 0, 0 },
ad276157
JMM
354};
355
ad276157
JMM
356static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
357 [RTA_DST] = { 1, 1, sizeof(ip4_addr) },
358 [RTA_OIF] = { 1, 1, sizeof(u32) },
359 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
360 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
361 [RTA_PREFSRC] = { 1, 1, sizeof(ip4_addr) },
362 [RTA_METRICS] = { 1, 0, 0 },
363 [RTA_MULTIPATH] = { 1, 0, 0 },
364 [RTA_FLOW] = { 1, 1, sizeof(u32) },
365 [RTA_TABLE] = { 1, 1, sizeof(u32) },
d14f8c3c
JMM
366 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
367 [RTA_ENCAP] = { 1, 0, 0 },
ad276157 368};
29a64162 369
ad276157
JMM
370static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
371 [RTA_DST] = { 1, 1, sizeof(ip6_addr) },
372 [RTA_IIF] = { 1, 1, sizeof(u32) },
373 [RTA_OIF] = { 1, 1, sizeof(u32) },
374 [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
375 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
376 [RTA_PREFSRC] = { 1, 1, sizeof(ip6_addr) },
377 [RTA_METRICS] = { 1, 0, 0 },
378 [RTA_FLOW] = { 1, 1, sizeof(u32) },
379 [RTA_TABLE] = { 1, 1, sizeof(u32) },
d14f8c3c
JMM
380 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
381 [RTA_ENCAP] = { 1, 0, 0 },
382};
383
384static struct nl_want_attrs rtm_attr_want_mpls[BIRD_RTA_MAX] = {
385 [RTA_DST] = { 1, 1, sizeof(u32) },
386 [RTA_IIF] = { 1, 1, sizeof(u32) },
387 [RTA_OIF] = { 1, 1, sizeof(u32) },
388 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
389 [RTA_METRICS] = { 1, 0, 0 },
390 [RTA_FLOW] = { 1, 1, sizeof(u32) },
391 [RTA_TABLE] = { 1, 1, sizeof(u32) },
392 [RTA_VIA] = { 1, 0, 0 },
393 [RTA_NEWDST] = { 1, 0, 0 },
ad276157 394};
ad276157
JMM
395
396
95616c82 397static int
ad276157 398nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, int ksize)
95616c82
OZ
399{
400 int max = ksize / sizeof(struct rtattr *);
401 bzero(k, ksize);
ad276157
JMM
402
403 for ( ; RTA_OK(a, nl_attr_len); a = RTA_NEXT(a, nl_attr_len))
95616c82 404 {
ad276157
JMM
405 if ((a->rta_type >= max) || !want[a->rta_type].defined)
406 continue;
407
408 if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size))
409 {
9b136840 410 log(L_ERR "nl_parse_attrs: Malformed attribute received");
ad276157
JMM
411 return 0;
412 }
413
414 k[a->rta_type] = a;
95616c82 415 }
ad276157 416
95616c82
OZ
417 if (nl_attr_len)
418 {
419 log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
420 return 0;
421 }
ad276157
JMM
422
423 return 1;
95616c82
OZ
424}
425
d14f8c3c
JMM
426static inline u16 rta_get_u16(struct rtattr *a)
427{ return *(u16 *) RTA_DATA(a); }
428
fce764f9 429static inline u32 rta_get_u32(struct rtattr *a)
acb04cfd
OZ
430{ return *(u32 *) RTA_DATA(a); }
431
432static inline ip4_addr rta_get_ip4(struct rtattr *a)
433{ return ip4_ntoh(*(ip4_addr *) RTA_DATA(a)); }
434
435static inline ip6_addr rta_get_ip6(struct rtattr *a)
436{ return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); }
437
9b136840
JMM
438static inline ip_addr rta_get_ipa(struct rtattr *a)
439{
440 if (RTA_PAYLOAD(a) == sizeof(ip4_addr))
441 return ipa_from_ip4(rta_get_ip4(a));
442 else
443 return ipa_from_ip6(rta_get_ip6(a));
444}
acb04cfd 445
d14f8c3c
JMM
446static inline ip_addr rta_get_via(struct rtattr *a)
447{
448 struct rtvia *v = RTA_DATA(a);
449 switch(v->rtvia_family) {
450 case AF_INET: return ipa_from_ip4(ip4_ntoh(*(ip4_addr *) v->rtvia_addr));
451 case AF_INET6: return ipa_from_ip6(ip6_ntoh(*(ip6_addr *) v->rtvia_addr));
452 }
453 return IPA_NONE;
454}
455
456static u32 rta_mpls_stack[MPLS_MAX_LABEL_STACK];
457static inline int rta_get_mpls(struct rtattr *a, u32 *stack)
458{
459 if (RTA_PAYLOAD(a) % 4)
460 log(L_WARN "KRT: Strange length of received MPLS stack: %u", RTA_PAYLOAD(a));
461
462 return mpls_get(RTA_DATA(a), RTA_PAYLOAD(a) & ~0x3, stack);
463}
464
9fdf9d29
OZ
465struct rtattr *
466nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen)
95616c82 467{
9fdf9d29
OZ
468 uint pos = NLMSG_ALIGN(h->nlmsg_len);
469 uint len = RTA_LENGTH(dlen);
95616c82
OZ
470
471 if (pos + len > bufsize)
472 bug("nl_add_attr: packet buffer overflow");
9fdf9d29
OZ
473
474 struct rtattr *a = (struct rtattr *)((char *)h + pos);
95616c82
OZ
475 a->rta_type = code;
476 a->rta_len = len;
477 h->nlmsg_len = pos + len;
9fdf9d29
OZ
478
479 if (dlen > 0)
480 memcpy(RTA_DATA(a), data, dlen);
481
482 return a;
95616c82
OZ
483}
484
d14f8c3c
JMM
485static inline struct rtattr *
486nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
487{
488 return nl_add_attr(h, bufsize, code, NULL, 0);
489}
490
491static inline void
492nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
493{
494 a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a;
495}
496
497static inline void
498nl_add_attr_u16(struct nlmsghdr *h, uint bufsize, int code, u16 data)
499{
500 nl_add_attr(h, bufsize, code, &data, 2);
501}
502
95616c82 503static inline void
29a64162 504nl_add_attr_u32(struct nlmsghdr *h, uint bufsize, int code, u32 data)
95616c82
OZ
505{
506 nl_add_attr(h, bufsize, code, &data, 4);
507}
508
509static inline void
29a64162 510nl_add_attr_ip4(struct nlmsghdr *h, uint bufsize, int code, ip4_addr ip4)
95616c82 511{
29a64162
OZ
512 ip4 = ip4_hton(ip4);
513 nl_add_attr(h, bufsize, code, &ip4, sizeof(ip4));
514}
515
516static inline void
517nl_add_attr_ip6(struct nlmsghdr *h, uint bufsize, int code, ip6_addr ip6)
518{
519 ip6 = ip6_hton(ip6);
520 nl_add_attr(h, bufsize, code, &ip6, sizeof(ip6));
521}
522
523static inline void
524nl_add_attr_ipa(struct nlmsghdr *h, uint bufsize, int code, ip_addr ipa)
525{
526 if (ipa_is_ip4(ipa))
527 nl_add_attr_ip4(h, bufsize, code, ipa_to_ip4(ipa));
9b136840 528 else
29a64162 529 nl_add_attr_ip6(h, bufsize, code, ipa_to_ip6(ipa));
95616c82
OZ
530}
531
d14f8c3c
JMM
532static inline void
533nl_add_attr_mpls(struct nlmsghdr *h, uint bufsize, int code, int len, u32 *stack)
9fdf9d29 534{
d14f8c3c
JMM
535 char buf[len*4];
536 mpls_put(buf, len, stack);
537 nl_add_attr(h, bufsize, code, buf, len*4);
9fdf9d29 538}
95616c82
OZ
539
540static inline void
d14f8c3c 541nl_add_attr_mpls_encap(struct nlmsghdr *h, uint bufsize, int len, u32 *stack)
95616c82 542{
d14f8c3c
JMM
543 nl_add_attr_u16(h, bufsize, RTA_ENCAP_TYPE, LWTUNNEL_ENCAP_MPLS);
544
545 struct rtattr *nest = nl_open_attr(h, bufsize, RTA_ENCAP);
546 nl_add_attr_mpls(h, bufsize, RTA_DST, len, stack);
547 nl_close_attr(h, nest);
548}
549
550static inline void
551nl_add_attr_via(struct nlmsghdr *h, uint bufsize, ip_addr ipa)
552{
553 struct rtattr *nest = nl_open_attr(h, bufsize, RTA_VIA);
554 struct rtvia *via = RTA_DATA(nest);
555
556 h->nlmsg_len += sizeof(*via);
557
62e64905
OZ
558 if (ipa_is_ip4(ipa))
559 {
d14f8c3c 560 via->rtvia_family = AF_INET;
62e64905
OZ
561 put_ip4(via->rtvia_addr, ipa_to_ip4(ipa));
562 h->nlmsg_len += sizeof(ip4_addr);
563 }
564 else
565 {
d14f8c3c 566 via->rtvia_family = AF_INET6;
62e64905
OZ
567 put_ip6(via->rtvia_addr, ipa_to_ip6(ipa));
568 h->nlmsg_len += sizeof(ip6_addr);
d14f8c3c
JMM
569 }
570
571 nl_close_attr(h, nest);
95616c82
OZ
572}
573
9fdf9d29
OZ
574static inline struct rtnexthop *
575nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
576{
577 uint pos = NLMSG_ALIGN(h->nlmsg_len);
578 uint len = RTNH_LENGTH(0);
579
580 if (pos + len > bufsize)
581 bug("nl_open_nexthop: packet buffer overflow");
582
583 h->nlmsg_len = pos + len;
584
585 return (void *)h + pos;
586}
587
588static inline void
589nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh)
590{
591 nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh;
592}
95616c82 593
d14f8c3c
JMM
594static inline void
595nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af)
596{
597 if (nh->labels > 0)
598 if (af == AF_MPLS)
599 nl_add_attr_mpls(h, bufsize, RTA_NEWDST, nh->labels, nh->label);
600 else
601 nl_add_attr_mpls_encap(h, bufsize, nh->labels, nh->label);
602
603 if (ipa_nonzero(nh->gw))
604 if (af == AF_MPLS)
605 nl_add_attr_via(h, bufsize, nh->gw);
606 else
607 nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
608}
609
95616c82 610static void
d14f8c3c 611nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af)
95616c82 612{
9fdf9d29
OZ
613 struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH);
614
95616c82 615 for (; nh; nh = nh->next)
9fdf9d29
OZ
616 {
617 struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize);
95616c82 618
9fdf9d29
OZ
619 rtnh->rtnh_flags = 0;
620 rtnh->rtnh_hops = nh->weight;
621 rtnh->rtnh_ifindex = nh->iface->index;
95616c82 622
d14f8c3c 623 nl_add_nexthop(h, bufsize, nh, af);
95616c82 624
9fdf9d29
OZ
625 nl_close_nexthop(h, rtnh);
626 }
627
628 nl_close_attr(h, a);
629}
95616c82 630
4e276a89 631static struct nexthop *
95616c82
OZ
632nl_parse_multipath(struct krt_proto *p, struct rtattr *ra)
633{
634 /* Temporary buffer for multicast nexthops */
4e276a89 635 static struct nexthop *nh_buffer;
95616c82
OZ
636 static int nh_buf_size; /* in number of structures */
637 static int nh_buf_used;
638
ad276157 639 struct rtattr *a[BIRD_RTA_MAX];
95616c82 640 struct rtnexthop *nh = RTA_DATA(ra);
4e276a89 641 struct nexthop *rv, *first, **last;
3e236955 642 unsigned len = RTA_PAYLOAD(ra);
95616c82
OZ
643
644 first = NULL;
645 last = &first;
646 nh_buf_used = 0;
647
648 while (len)
649 {
650 /* Use RTNH_OK(nh,len) ?? */
651 if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
652 return NULL;
653
654 if (nh_buf_used == nh_buf_size)
655 {
656 nh_buf_size = nh_buf_size ? (nh_buf_size * 2) : 4;
d14f8c3c 657 nh_buffer = xrealloc(nh_buffer, nh_buf_size * NEXTHOP_MAX_SIZE);
95616c82
OZ
658 }
659 *last = rv = nh_buffer + nh_buf_used++;
660 rv->next = NULL;
661 last = &(rv->next);
662
663 rv->weight = nh->rtnh_hops;
664 rv->iface = if_find_by_index(nh->rtnh_ifindex);
665 if (!rv->iface)
666 return NULL;
667
668 /* Nonexistent RTNH_PAYLOAD ?? */
669 nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
4e276a89 670 nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a));
95616c82
OZ
671 if (a[RTA_GATEWAY])
672 {
23c212e7 673 rv->gw = rta_get_ipa(a[RTA_GATEWAY]);
95616c82 674
23c212e7
OZ
675 neighbor *nbr;
676 nbr = neigh_find2(&p->p, &rv->gw, rv->iface,
677 (nh->rtnh_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
678 if (!nbr || (nbr->scope == SCOPE_HOST))
95616c82
OZ
679 return NULL;
680 }
681 else
d14f8c3c 682 rv->gw = IPA_NONE;
62e64905 683
d14f8c3c
JMM
684 if (a[RTA_ENCAP_TYPE])
685 {
686 if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS) {
687 log(L_WARN "KRT: Unknown encapsulation method %d in multipath", rta_get_u16(a[RTA_ENCAP_TYPE]));
688 return NULL;
689 }
690
691 struct rtattr *enca[BIRD_RTA_MAX];
692 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
693 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
694 rv->labels = rta_get_mpls(enca[RTA_DST], rv->label);
695 break;
696 }
697
95616c82
OZ
698
699 len -= NLMSG_ALIGN(nh->rtnh_len);
700 nh = RTNH_NEXT(nh);
701 }
702
703 return first;
704}
705
9fdf9d29
OZ
706static void
707nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, int max)
708{
709 struct rtattr *a = nl_open_attr(h, bufsize, RTA_METRICS);
710 int t;
711
712 for (t = 1; t < max; t++)
713 if (metrics[0] & (1 << t))
714 nl_add_attr_u32(h, bufsize, t, metrics[t]);
715
716 nl_close_attr(h, a);
717}
718
719static int
720nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int max)
721{
722 struct rtattr *a = RTA_DATA(hdr);
723 int len = RTA_PAYLOAD(hdr);
724
725 metrics[0] = 0;
726 for (; RTA_OK(a, len); a = RTA_NEXT(a, len))
727 {
728 if (a->rta_type == RTA_UNSPEC)
729 continue;
730
731 if (a->rta_type >= max)
732 continue;
733
734 if (RTA_PAYLOAD(a) != 4)
735 return -1;
736
737 metrics[0] |= 1 << a->rta_type;
acb04cfd 738 metrics[a->rta_type] = rta_get_u32(a);
9fdf9d29
OZ
739 }
740
741 if (len > 0)
742 return -1;
743
744 return 0;
745}
746
95616c82
OZ
747
748/*
749 * Scanning of interfaces
750 */
751
752static void
753nl_parse_link(struct nlmsghdr *h, int scan)
754{
755 struct ifinfomsg *i;
ad276157 756 struct rtattr *a[BIRD_IFLA_MAX];
95616c82
OZ
757 int new = h->nlmsg_type == RTM_NEWLINK;
758 struct iface f = {};
759 struct iface *ifi;
760 char *name;
761 u32 mtu;
ae80a2de 762 uint fl;
95616c82 763
ad276157 764 if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), ifla_attr_want, a, sizeof(a)))
95616c82 765 return;
ad276157 766 if (!a[IFLA_IFNAME] || (RTA_PAYLOAD(a[IFLA_IFNAME]) < 2) || !a[IFLA_MTU])
95616c82 767 {
ad276157
JMM
768 /*
769 * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
770 * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
771 * We simply ignore all such messages with IFLA_WIRELESS without notice.
772 */
773
774 if (a[IFLA_WIRELESS])
775 return;
776
777 log(L_ERR "KIF: Malformed message received");
95616c82
OZ
778 return;
779 }
ad276157 780
95616c82 781 name = RTA_DATA(a[IFLA_IFNAME]);
acb04cfd 782 mtu = rta_get_u32(a[IFLA_MTU]);
95616c82
OZ
783
784 ifi = if_find_by_index(i->ifi_index);
785 if (!new)
786 {
787 DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
788 if (!ifi)
789 return;
790
791 if_delete(ifi);
792 }
793 else
794 {
795 DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
796 if (ifi && strncmp(ifi->name, name, sizeof(ifi->name)-1))
797 if_delete(ifi);
798
799 strncpy(f.name, name, sizeof(f.name)-1);
800 f.index = i->ifi_index;
801 f.mtu = mtu;
802
803 fl = i->ifi_flags;
804 if (fl & IFF_UP)
805 f.flags |= IF_ADMIN_UP;
806 if (fl & IFF_LOWER_UP)
807 f.flags |= IF_LINK_UP;
808 if (fl & IFF_LOOPBACK) /* Loopback */
809 f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
810 else if (fl & IFF_POINTOPOINT) /* PtP */
811 f.flags |= IF_MULTICAST;
812 else if (fl & IFF_BROADCAST) /* Broadcast */
813 f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
814 else
815 f.flags |= IF_MULTIACCESS; /* NBMA */
3216eb03 816
16a3254c
OZ
817 if (fl & IFF_MULTICAST)
818 f.flags |= IF_MULTICAST;
819
3216eb03
OZ
820 ifi = if_update(&f);
821
822 if (!scan)
823 if_end_partial_update(ifi);
95616c82
OZ
824 }
825}
826
827static void
9b136840 828nl_parse_addr4(struct ifaddrmsg *i, int scan, int new)
95616c82 829{
ad276157 830 struct rtattr *a[BIRD_IFA_MAX];
95616c82 831 struct iface *ifi;
e37d2e3e 832 u32 ifa_flags;
95616c82
OZ
833 int scope;
834
9b136840 835 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a)))
95616c82 836 return;
ad276157 837
9b136840 838 if (!a[IFA_LOCAL])
ad276157 839 {
9b136840
JMM
840 log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)");
841 return;
ad276157 842 }
ad276157 843 if (!a[IFA_ADDRESS])
95616c82 844 {
ad276157 845 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
95616c82
OZ
846 return;
847 }
848
849 ifi = if_find_by_index(i->ifa_index);
850 if (!ifi)
851 {
852 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
853 return;
854 }
855
e37d2e3e
OZ
856 if (a[IFA_FLAGS])
857 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
858 else
859 ifa_flags = i->ifa_flags;
860
9b136840 861 struct ifa ifa;
95616c82
OZ
862 bzero(&ifa, sizeof(ifa));
863 ifa.iface = ifi;
cc5b93f7 864 if (ifa_flags & IFA_F_SECONDARY)
95616c82
OZ
865 ifa.flags |= IA_SECONDARY;
866
9b136840
JMM
867 ifa.ip = rta_get_ipa(a[IFA_LOCAL]);
868
d7661fbe 869 if (i->ifa_prefixlen > IP4_MAX_PREFIX_LENGTH)
95616c82
OZ
870 {
871 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
872 new = 0;
873 }
d7661fbe 874 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH)
95616c82 875 {
9b136840
JMM
876 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
877 net_fill_ip4(&ifa.prefix, rta_get_ip4(a[IFA_ADDRESS]), i->ifa_prefixlen);
95616c82
OZ
878
879 /* It is either a host address or a peer address */
9b136840 880 if (ipa_equal(ifa.ip, ifa.brd))
95616c82
OZ
881 ifa.flags |= IA_HOST;
882 else
883 {
884 ifa.flags |= IA_PEER;
9b136840 885 ifa.opposite = ifa.brd;
95616c82
OZ
886 }
887 }
888 else
889 {
9b136840
JMM
890 net_fill_ip4(&ifa.prefix, ipa_to_ip4(ifa.ip), i->ifa_prefixlen);
891 net_normalize(&ifa.prefix);
892
d7661fbe 893 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 1)
95616c82
OZ
894 ifa.opposite = ipa_opposite_m1(ifa.ip);
895
d7661fbe 896 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 2)
95616c82
OZ
897 ifa.opposite = ipa_opposite_m2(ifa.ip);
898
899 if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST])
900 {
9b136840
JMM
901 ip4_addr xbrd = rta_get_ip4(a[IFA_BROADCAST]);
902 ip4_addr ybrd = ip4_or(ipa_to_ip4(ifa.ip), ip4_not(ip4_mkmask(i->ifa_prefixlen)));
903
904 if (ip4_equal(xbrd, net4_prefix(&ifa.prefix)) || ip4_equal(xbrd, ybrd))
905 ifa.brd = ipa_from_ip4(xbrd);
95616c82 906 else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */
9b136840 907 {
e691d16a 908 log(L_ERR "KIF: Invalid broadcast address %I4 for %s", xbrd, ifi->name);
9b136840
JMM
909 ifa.brd = ipa_from_ip4(ybrd);
910 }
911 }
912 }
913
914 scope = ipa_classify(ifa.ip);
915 if (scope < 0)
916 {
917 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
918 return;
919 }
920 ifa.scope = scope & IADDR_SCOPE_MASK;
921
922 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
923 ifi->index, ifi->name,
924 new ? "added" : "removed",
925 ifa.ip, ifa.flags, ifa.prefix, ifa.brd, ifa.opposite);
926
927 if (new)
928 ifa_update(&ifa);
929 else
930 ifa_delete(&ifa);
931
932 if (!scan)
933 if_end_partial_update(ifi);
934}
935
936static void
937nl_parse_addr6(struct ifaddrmsg *i, int scan, int new)
938{
939 struct rtattr *a[BIRD_IFA_MAX];
940 struct iface *ifi;
cc5b93f7 941 u32 ifa_flags;
9b136840
JMM
942 int scope;
943
944 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a)))
945 return;
946
947 if (!a[IFA_ADDRESS])
948 {
949 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
950 return;
951 }
952
953 ifi = if_find_by_index(i->ifa_index);
954 if (!ifi)
955 {
956 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
957 return;
958 }
959
cc5b93f7
OZ
960 if (a[IFA_FLAGS])
961 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
962 else
963 ifa_flags = i->ifa_flags;
964
9b136840
JMM
965 struct ifa ifa;
966 bzero(&ifa, sizeof(ifa));
967 ifa.iface = ifi;
e37d2e3e 968 if (ifa_flags & IFA_F_SECONDARY)
9b136840
JMM
969 ifa.flags |= IA_SECONDARY;
970
e37d2e3e
OZ
971 /* Ignore tentative addresses silently */
972 if (ifa_flags & IFA_F_TENTATIVE)
973 return;
9b136840 974
95616c82 975 /* IFA_LOCAL can be unset for IPv6 interfaces */
9b136840
JMM
976 ifa.ip = rta_get_ipa(a[IFA_LOCAL] ? : a[IFA_ADDRESS]);
977
d7661fbe 978 if (i->ifa_prefixlen > IP6_MAX_PREFIX_LENGTH)
9b136840
JMM
979 {
980 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
981 new = 0;
982 }
d7661fbe 983 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH)
9b136840
JMM
984 {
985 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
986 net_fill_ip6(&ifa.prefix, rta_get_ip6(a[IFA_ADDRESS]), i->ifa_prefixlen);
987
988 /* It is either a host address or a peer address */
989 if (ipa_equal(ifa.ip, ifa.brd))
990 ifa.flags |= IA_HOST;
991 else
992 {
993 ifa.flags |= IA_PEER;
994 ifa.opposite = ifa.brd;
95616c82 995 }
9b136840
JMM
996 }
997 else
998 {
999 net_fill_ip6(&ifa.prefix, ipa_to_ip6(ifa.ip), i->ifa_prefixlen);
1000 net_normalize(&ifa.prefix);
1001
d7661fbe 1002 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH - 1)
9b136840 1003 ifa.opposite = ipa_opposite_m1(ifa.ip);
95616c82
OZ
1004 }
1005
1006 scope = ipa_classify(ifa.ip);
1007 if (scope < 0)
1008 {
1009 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
1010 return;
1011 }
1012 ifa.scope = scope & IADDR_SCOPE_MASK;
1013
9b136840 1014 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
95616c82
OZ
1015 ifi->index, ifi->name,
1016 new ? "added" : "removed",
9b136840 1017 ifa.ip, ifa.flags, ifa.prefix, ifa.brd, ifa.opposite);
3216eb03 1018
95616c82
OZ
1019 if (new)
1020 ifa_update(&ifa);
1021 else
1022 ifa_delete(&ifa);
3216eb03
OZ
1023
1024 if (!scan)
1025 if_end_partial_update(ifi);
95616c82
OZ
1026}
1027
9b136840
JMM
1028static void
1029nl_parse_addr(struct nlmsghdr *h, int scan)
1030{
1031 struct ifaddrmsg *i;
1032
1033 if (!(i = nl_checkin(h, sizeof(*i))))
1034 return;
1035
1036 int new = (h->nlmsg_type == RTM_NEWADDR);
1037
1038 switch (i->ifa_family)
1039 {
9b136840
JMM
1040 case AF_INET:
1041 return nl_parse_addr4(i, scan, new);
29a64162 1042
9b136840
JMM
1043 case AF_INET6:
1044 return nl_parse_addr6(i, scan, new);
9b136840
JMM
1045 }
1046}
1047
95616c82
OZ
1048void
1049kif_do_scan(struct kif_proto *p UNUSED)
1050{
1051 struct nlmsghdr *h;
1052
1053 if_start_update();
1054
86c3eea0 1055 nl_request_dump(AF_UNSPEC, RTM_GETLINK);
95616c82
OZ
1056 while (h = nl_get_scan())
1057 if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
1058 nl_parse_link(h, 1);
1059 else
1060 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1061
d7661fbe 1062 nl_request_dump(AF_INET, RTM_GETADDR);
95616c82
OZ
1063 while (h = nl_get_scan())
1064 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
3216eb03 1065 nl_parse_addr(h, 1);
95616c82
OZ
1066 else
1067 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1068
d7661fbe
JMM
1069 nl_request_dump(AF_INET6, RTM_GETADDR);
1070 while (h = nl_get_scan())
1071 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
1072 nl_parse_addr(h, 1);
1073 else
1074 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1075
95616c82
OZ
1076 if_end_update();
1077}
1078
1079/*
1080 * Routes
1081 */
1082
9ddbfbdd
JMM
1083static inline u32
1084krt_table_id(struct krt_proto *p)
1085{
1086 return KRT_CF->sys.table_id;
1087}
1088
1089static HASH(struct krt_proto) nl_table_map;
1090
29a64162
OZ
1091#define RTH_KEY(p) p->af, krt_table_id(p)
1092#define RTH_NEXT(p) p->sys.hash_next
1093#define RTH_EQ(a1,i1,a2,i2) a1 == a2 && i1 == i2
1094#define RTH_FN(a,i) a ^ u32_hash(i)
9ddbfbdd
JMM
1095
1096#define RTH_REHASH rth_rehash
1097#define RTH_PARAMS /8, *2, 2, 2, 6, 20
1098
1099HASH_DEFINE_REHASH_FN(RTH, struct krt_proto)
95616c82
OZ
1100
1101int
1102krt_capable(rte *e)
1103{
1104 rta *a = e->attrs;
1105
95616c82 1106 switch (a->dest)
62e64905 1107 {
4e276a89 1108 case RTD_UNICAST:
95616c82
OZ
1109 case RTD_BLACKHOLE:
1110 case RTD_UNREACHABLE:
1111 case RTD_PROHIBIT:
62e64905
OZ
1112 return 1;
1113
95616c82
OZ
1114 default:
1115 return 0;
62e64905 1116 }
95616c82
OZ
1117}
1118
1119static inline int
4e276a89 1120nh_bufsize(struct nexthop *nh)
95616c82
OZ
1121{
1122 int rv = 0;
1123 for (; nh != NULL; nh = nh->next)
9fdf9d29 1124 rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
95616c82
OZ
1125 return rv;
1126}
1127
1128static int
4e276a89 1129nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int dest, struct nexthop *nh)
95616c82
OZ
1130{
1131 eattr *ea;
1132 net *net = e->net;
1133 rta *a = e->attrs;
4e276a89 1134 int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh));
4adcb9df 1135 u32 priority = 0;
a8caff32 1136
95616c82
OZ
1137 struct {
1138 struct nlmsghdr h;
1139 struct rtmsg r;
a8caff32
JMM
1140 char buf[0];
1141 } *r;
1142
1143 int rsize = sizeof(*r) + bufsize;
1144 r = alloca(rsize);
95616c82 1145
cc5b93f7 1146 DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op);
95616c82 1147
a8caff32
JMM
1148 bzero(&r->h, sizeof(r->h));
1149 bzero(&r->r, sizeof(r->r));
cc5b93f7 1150 r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE;
a8caff32 1151 r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
cc5b93f7 1152 r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK;
95616c82 1153
a8caff32
JMM
1154 r->r.rtm_family = p->af;
1155 r->r.rtm_dst_len = net_pxlen(net->n.addr);
1156 r->r.rtm_protocol = RTPROT_BIRD;
7074be22 1157 r->r.rtm_scope = RT_SCOPE_NOWHERE;
d14f8c3c
JMM
1158 if (p->af == AF_MPLS)
1159 {
1160 u32 label = net_mpls(net->n.addr);
1161 nl_add_attr_mpls(&r->h, rsize, RTA_DST, 1, &label);
1162 }
1163 else
1164 nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr));
95616c82 1165
2feaa693
OZ
1166 /*
1167 * Strange behavior for RTM_DELROUTE:
1168 * 1) rtm_family is ignored in IPv6, works for IPv4
1169 * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
1170 * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
1171 */
1172
9ddbfbdd 1173 if (krt_table_id(p) < 256)
a8caff32 1174 r->r.rtm_table = krt_table_id(p);
9ddbfbdd 1175 else
a8caff32 1176 nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p));
9ddbfbdd 1177
4adcb9df
OZ
1178 if (a->source == RTS_DUMMY)
1179 priority = e->u.krt.metric;
1180 else if (KRT_CF->sys.metric)
1181 priority = KRT_CF->sys.metric;
1182 else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC)))
1183 priority = ea->u.data;
78a2cc28 1184
4adcb9df 1185 if (priority)
d1b8fe93 1186 nl_add_attr_u32(&r->h, rsize, RTA_PRIORITY, priority);
78a2cc28 1187
2feaa693
OZ
1188 /* For route delete, we do not specify remaining route attributes */
1189 if (op == NL_OP_DELETE)
1190 goto dest;
78a2cc28 1191
6e75d0d2
OZ
1192 /* Default scope is LINK for device routes, UNIVERSE otherwise */
1193 if (ea = ea_find(eattrs, EA_KRT_SCOPE))
cc5b93f7 1194 r->r.rtm_scope = ea->u.data;
6e75d0d2 1195 else
4e276a89 1196 r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
95616c82
OZ
1197
1198 if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
a8caff32 1199 nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
95616c82
OZ
1200
1201 if (ea = ea_find(eattrs, EA_KRT_REALM))
a8caff32 1202 nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data);
95616c82 1203
9fdf9d29
OZ
1204
1205 u32 metrics[KRT_METRICS_MAX];
1206 metrics[0] = 0;
1207
1208 struct ea_walk_state ews = { .eattrs = eattrs };
1209 while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX))
1210 {
1211 int id = ea->id - EA_KRT_METRICS;
1212 metrics[0] |= 1 << id;
1213 metrics[id] = ea->u.data;
1214 }
1215
1216 if (metrics[0])
a8caff32 1217 nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX);
9fdf9d29
OZ
1218
1219
2feaa693 1220dest:
2feaa693 1221 switch (dest)
95616c82 1222 {
4e276a89 1223 case RTD_UNICAST:
a8caff32 1224 r->r.rtm_type = RTN_UNICAST;
4e276a89 1225 if (nh->next && !krt_ecmp6(p))
d14f8c3c 1226 nl_add_multipath(&r->h, rsize, nh, p->af);
4e276a89
JMM
1227 else
1228 {
1229 nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index);
d14f8c3c 1230 nl_add_nexthop(&r->h, rsize, nh, p->af);
4e276a89 1231 }
95616c82
OZ
1232 break;
1233 case RTD_BLACKHOLE:
a8caff32 1234 r->r.rtm_type = RTN_BLACKHOLE;
95616c82
OZ
1235 break;
1236 case RTD_UNREACHABLE:
a8caff32 1237 r->r.rtm_type = RTN_UNREACHABLE;
95616c82
OZ
1238 break;
1239 case RTD_PROHIBIT:
a8caff32 1240 r->r.rtm_type = RTN_PROHIBIT;
95616c82 1241 break;
2feaa693
OZ
1242 case RTD_NONE:
1243 break;
95616c82
OZ
1244 default:
1245 bug("krt_capable inconsistent with nl_send_route");
1246 }
1247
2feaa693 1248 /* Ignore missing for DELETE */
cc5b93f7 1249 return nl_exchange(&r->h, (op == NL_OP_DELETE));
2feaa693
OZ
1250}
1251
1252static inline int
1253nl_add_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs)
1254{
1255 rta *a = e->attrs;
1256 int err = 0;
1257
4e276a89 1258 if (krt_ecmp6(p) && a->nh.next)
2feaa693 1259 {
4e276a89 1260 struct nexthop *nh = &(a->nh);
2feaa693 1261
4e276a89 1262 err = nl_send_route(p, e, eattrs, NL_OP_ADD, RTD_UNICAST, nh);
2feaa693
OZ
1263 if (err < 0)
1264 return err;
1265
1266 for (nh = nh->next; nh; nh = nh->next)
4e276a89 1267 err += nl_send_route(p, e, eattrs, NL_OP_APPEND, RTD_UNICAST, nh);
2feaa693
OZ
1268
1269 return err;
1270 }
1271
4e276a89 1272 return nl_send_route(p, e, eattrs, NL_OP_ADD, a->dest, &(a->nh));
2feaa693
OZ
1273}
1274
1275static inline int
1276nl_delete_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs)
1277{
1278 int err = 0;
1279
1280 /* For IPv6, we just repeatedly request DELETE until we get error */
1281 do
4e276a89 1282 err = nl_send_route(p, e, eattrs, NL_OP_DELETE, RTD_NONE, NULL);
2feaa693
OZ
1283 while (krt_ecmp6(p) && !err);
1284
1285 return err;
95616c82
OZ
1286}
1287
1288void
1289krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list *eattrs)
1290{
1291 int err = 0;
1292
1293 /*
2feaa693
OZ
1294 * We could use NL_OP_REPLACE, but route replace on Linux has some problems:
1295 *
1296 * 1) Does not check for matching rtm_protocol
1297 * 2) Has broken semantics for IPv6 ECMP
1298 * 3) Crashes some kernel version when used for IPv6 ECMP
1299 *
1300 * So we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the old
1301 * route value, so we do not try to optimize IPv6 ECMP reconfigurations.
95616c82
OZ
1302 */
1303
1304 if (old)
2feaa693 1305 nl_delete_rte(p, old, eattrs);
95616c82
OZ
1306
1307 if (new)
2feaa693 1308 err = nl_add_rte(p, new, eattrs);
95616c82
OZ
1309
1310 if (err < 0)
1311 n->n.flags |= KRF_SYNC_ERROR;
1312 else
1313 n->n.flags &= ~KRF_SYNC_ERROR;
1314}
1315
1316
4e276a89
JMM
1317static inline struct nexthop *
1318nl_alloc_nexthop(struct nl_parse_state *s, ip_addr gw, struct iface *iface, byte weight)
2feaa693 1319{
4e276a89 1320 struct nexthop *nh = lp_alloc(s->pool, sizeof(struct nexthop));
2feaa693
OZ
1321
1322 nh->gw = gw;
1323 nh->iface = iface;
1324 nh->next = NULL;
1325 nh->weight = weight;
1326
1327 return nh;
1328}
1329
1330static int
1331nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type)
1332{
1333 /* Route merging must be active */
1334 if (!s->merge)
1335 return 0;
1336
1337 /* Saved and new route must have same network, proto/table, and priority */
1338 if ((s->net != net) || (s->proto != p) || (s->krt_metric != priority))
1339 return 0;
1340
1341 /* Both must be regular unicast routes */
1342 if ((s->krt_type != RTN_UNICAST) || (krt_type != RTN_UNICAST))
1343 return 0;
1344
1345 return 1;
1346}
1347
1348static void
1349nl_announce_route(struct nl_parse_state *s)
1350{
1351 rte *e = rte_get_temp(s->attrs);
1352 e->net = s->net;
1353 e->u.krt.src = s->krt_src;
1354 e->u.krt.proto = s->krt_proto;
1355 e->u.krt.seen = 0;
1356 e->u.krt.best = 0;
1357 e->u.krt.metric = s->krt_metric;
1358
1359 if (s->scan)
1360 krt_got_route(s->proto, e);
1361 else
1362 krt_got_route_async(s->proto, e, s->new);
1363
1364 s->net = NULL;
1365 s->attrs = NULL;
1366 s->proto = NULL;
1367 lp_flush(s->pool);
1368}
1369
1370static inline void
1371nl_parse_begin(struct nl_parse_state *s, int scan, int merge)
1372{
1373 memset(s, 0, sizeof (struct nl_parse_state));
1374 s->pool = nl_linpool;
1375 s->scan = scan;
1376 s->merge = merge;
1377}
1378
1379static inline void
1380nl_parse_end(struct nl_parse_state *s)
1381{
1382 if (s->net)
1383 nl_announce_route(s);
1384}
1385
1386
95616c82
OZ
1387#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
1388
1389static void
2feaa693 1390nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
95616c82
OZ
1391{
1392 struct krt_proto *p;
1393 struct rtmsg *i;
ad276157 1394 struct rtattr *a[BIRD_RTA_MAX];
95616c82
OZ
1395 int new = h->nlmsg_type == RTM_NEWROUTE;
1396
29a64162 1397 net_addr dst;
95616c82 1398 u32 oif = ~0;
29a64162 1399 u32 table_id;
2feaa693 1400 u32 priority = 0;
6e75d0d2 1401 u32 def_scope = RT_SCOPE_UNIVERSE;
95616c82
OZ
1402 int src;
1403
ad276157 1404 if (!(i = nl_checkin(h, sizeof(*i))))
95616c82 1405 return;
ad276157
JMM
1406
1407 switch (i->rtm_family)
95616c82 1408 {
29a64162
OZ
1409 case AF_INET:
1410 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a)))
1411 return;
1412
1413 if (a[RTA_DST])
1414 net_fill_ip4(&dst, rta_get_ip4(a[RTA_DST]), i->rtm_dst_len);
1415 else
1416 net_fill_ip4(&dst, IP4_NONE, 0);
1417 break;
1418
cc5b93f7
OZ
1419 case AF_INET6:
1420 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a)))
1421 return;
29a64162
OZ
1422
1423 if (a[RTA_DST])
1424 net_fill_ip6(&dst, rta_get_ip6(a[RTA_DST]), i->rtm_dst_len);
1425 else
1426 net_fill_ip6(&dst, IP6_NONE, 0);
1427 break;
1428
d14f8c3c
JMM
1429 case AF_MPLS:
1430 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want_mpls, a, sizeof(a)))
1431 return;
1432
ed610044
OZ
1433 if (!a[RTA_DST])
1434 SKIP("MPLS route without RTA_DST");
1435
1436 if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) != 1)
1437 SKIP("MPLS route with multi-label RTA_DST");
1438
1439 net_fill_mpls(&dst, rta_mpls_stack[0]);
d14f8c3c
JMM
1440 break;
1441
29a64162
OZ
1442 default:
1443 return;
95616c82
OZ
1444 }
1445
95616c82 1446 if (a[RTA_OIF])
acb04cfd 1447 oif = rta_get_u32(a[RTA_OIF]);
95616c82 1448
9ddbfbdd 1449 if (a[RTA_TABLE])
29a64162 1450 table_id = rta_get_u32(a[RTA_TABLE]);
9ddbfbdd 1451 else
29a64162 1452 table_id = i->rtm_table;
9ddbfbdd 1453
29a64162
OZ
1454 /* Do we know this table? */
1455 p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id);
95616c82 1456 if (!p)
9ddbfbdd 1457 SKIP("unknown table %d\n", table);
95616c82 1458
95616c82
OZ
1459 if (a[RTA_IIF])
1460 SKIP("IIF set\n");
29a64162 1461
95616c82
OZ
1462 if (i->rtm_tos != 0) /* We don't support TOS */
1463 SKIP("TOS %02x\n", i->rtm_tos);
95616c82 1464
2feaa693 1465 if (s->scan && !new)
95616c82
OZ
1466 SKIP("RTM_DELROUTE in scan\n");
1467
2feaa693
OZ
1468 if (a[RTA_PRIORITY])
1469 priority = rta_get_u32(a[RTA_PRIORITY]);
1470
9b136840 1471 int c = net_classify(&dst);
95616c82
OZ
1472 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
1473 SKIP("strange class/scope\n");
1474
95616c82
OZ
1475 switch (i->rtm_protocol)
1476 {
1477 case RTPROT_UNSPEC:
1478 SKIP("proto unspec\n");
1479
1480 case RTPROT_REDIRECT:
1481 src = KRT_SRC_REDIRECT;
1482 break;
1483
1484 case RTPROT_KERNEL:
1485 src = KRT_SRC_KERNEL;
1486 return;
1487
1488 case RTPROT_BIRD:
2feaa693 1489 if (!s->scan)
95616c82
OZ
1490 SKIP("echo\n");
1491 src = KRT_SRC_BIRD;
1492 break;
1493
1494 case RTPROT_BOOT:
1495 default:
1496 src = KRT_SRC_ALIEN;
1497 }
1498
f4a60a9b 1499 net *net = net_get(p->p.main_channel->table, &dst);
95616c82 1500
2feaa693
OZ
1501 if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type))
1502 nl_announce_route(s);
1503
d14f8c3c 1504 rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE);
2feaa693
OZ
1505 ra->src = p->p.main_source;
1506 ra->source = RTS_INHERIT;
1507 ra->scope = SCOPE_UNIVERSE;
95616c82
OZ
1508
1509 switch (i->rtm_type)
1510 {
1511 case RTN_UNICAST:
62e64905 1512 ra->dest = RTD_UNICAST;
95616c82 1513
ad276157 1514 if (a[RTA_MULTIPATH] && (i->rtm_family == AF_INET))
95616c82 1515 {
4e276a89
JMM
1516 struct nexthop *nh = nl_parse_multipath(p, a[RTA_MULTIPATH]);
1517 if (!nh)
95616c82 1518 {
fe9f1a6d 1519 log(L_ERR "KRT: Received strange multipath route %N", net->n.addr);
95616c82
OZ
1520 return;
1521 }
9fdf9d29 1522
62e64905 1523 ra->nh = *nh;
95616c82
OZ
1524 break;
1525 }
1526
4e276a89
JMM
1527 ra->nh.iface = if_find_by_index(oif);
1528 if (!ra->nh.iface)
95616c82 1529 {
fe9f1a6d 1530 log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif);
95616c82
OZ
1531 return;
1532 }
1533
d14f8c3c 1534 if ((i->rtm_family != AF_MPLS) && a[RTA_GATEWAY] || (i->rtm_family == AF_MPLS) && a[RTA_VIA])
95616c82 1535 {
d14f8c3c
JMM
1536 if (i->rtm_family == AF_MPLS)
1537 ra->nh.gw = rta_get_via(a[RTA_VIA]);
1538 else
1539 ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]);
95616c82
OZ
1540
1541 /* Silently skip strange 6to4 routes */
0bf95f99 1542 const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96);
4e276a89 1543 if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit))
95616c82
OZ
1544 return;
1545
23c212e7 1546 neighbor *nbr;
d14f8c3c 1547 nbr = neigh_find2(&p->p, &(ra->nh.gw), ra->nh.iface,
23c212e7
OZ
1548 (i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
1549 if (!nbr || (nbr->scope == SCOPE_HOST))
95616c82 1550 {
4e276a89
JMM
1551 log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr,
1552 ra->nh.gw);
95616c82
OZ
1553 return;
1554 }
1555 }
95616c82
OZ
1556
1557 break;
1558 case RTN_BLACKHOLE:
2feaa693 1559 ra->dest = RTD_BLACKHOLE;
95616c82
OZ
1560 break;
1561 case RTN_UNREACHABLE:
2feaa693 1562 ra->dest = RTD_UNREACHABLE;
95616c82
OZ
1563 break;
1564 case RTN_PROHIBIT:
2feaa693 1565 ra->dest = RTD_PROHIBIT;
95616c82
OZ
1566 break;
1567 /* FIXME: What about RTN_THROW? */
1568 default:
1569 SKIP("type %d\n", i->rtm_type);
1570 return;
1571 }
1572
d14f8c3c
JMM
1573 int labels = 0;
1574 if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next)
1575 labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label);
1576
1577 if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next)
1578 {
1579 switch (rta_get_u16(a[RTA_ENCAP_TYPE]))
1580 {
1581 case LWTUNNEL_ENCAP_MPLS:
1582 {
1583 struct rtattr *enca[BIRD_RTA_MAX];
1584 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
1585 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
1586 labels = rta_get_mpls(enca[RTA_DST], ra->nh.label);
1587 break;
1588 }
1589 default:
1590 SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE]));
1591 break;
1592 }
1593 }
1594
1595 if (labels < 0)
1596 {
1597 log(L_WARN "KRT: Too long MPLS stack received, ignoring.");
1598 ra->nh.labels = 0;
1599 }
1600 else
1601 ra->nh.labels = labels;
1602
1603 rte *e = rte_get_temp(ra);
1604 e->net = net;
1605 e->u.krt.src = src;
1606 e->u.krt.proto = i->rtm_protocol;
1607 e->u.krt.seen = 0;
1608 e->u.krt.best = 0;
1609 e->u.krt.metric = 0;
1610
6e75d0d2
OZ
1611 if (i->rtm_scope != def_scope)
1612 {
1613 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1614 ea->next = ra->eattrs;
1615 ra->eattrs = ea;
1616 ea->flags = EALF_SORTED;
1617 ea->count = 1;
1618 ea->attrs[0].id = EA_KRT_SCOPE;
1619 ea->attrs[0].flags = 0;
1620 ea->attrs[0].type = EAF_TYPE_INT;
1621 ea->attrs[0].u.data = i->rtm_scope;
1622 }
95616c82 1623
d14f8c3c
JMM
1624 if (a[RTA_PRIORITY])
1625 e->u.krt.metric = rta_get_u32(a[RTA_PRIORITY]);
1626
95616c82
OZ
1627 if (a[RTA_PREFSRC])
1628 {
9b136840 1629 ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]);
95616c82 1630
2feaa693
OZ
1631 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1632 ea->next = ra->eattrs;
1633 ra->eattrs = ea;
95616c82
OZ
1634 ea->flags = EALF_SORTED;
1635 ea->count = 1;
1636 ea->attrs[0].id = EA_KRT_PREFSRC;
1637 ea->attrs[0].flags = 0;
1638 ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
2feaa693 1639 ea->attrs[0].u.ptr = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
95616c82
OZ
1640 ea->attrs[0].u.ptr->length = sizeof(ps);
1641 memcpy(ea->attrs[0].u.ptr->data, &ps, sizeof(ps));
1642 }
1643
1644 if (a[RTA_FLOW])
1645 {
2feaa693
OZ
1646 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1647 ea->next = ra->eattrs;
1648 ra->eattrs = ea;
95616c82
OZ
1649 ea->flags = EALF_SORTED;
1650 ea->count = 1;
1651 ea->attrs[0].id = EA_KRT_REALM;
1652 ea->attrs[0].flags = 0;
1653 ea->attrs[0].type = EAF_TYPE_INT;
acb04cfd 1654 ea->attrs[0].u.data = rta_get_u32(a[RTA_FLOW]);
95616c82
OZ
1655 }
1656
9fdf9d29
OZ
1657 if (a[RTA_METRICS])
1658 {
1659 u32 metrics[KRT_METRICS_MAX];
2feaa693 1660 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
9fdf9d29
OZ
1661 int t, n = 0;
1662
1663 if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
1664 {
fe9f1a6d 1665 log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net->n.addr);
9fdf9d29
OZ
1666 return;
1667 }
1668
1669 for (t = 1; t < KRT_METRICS_MAX; t++)
1670 if (metrics[0] & (1 << t))
1671 {
1672 ea->attrs[n].id = EA_CODE(EAP_KRT, KRT_METRICS_OFFSET + t);
1673 ea->attrs[n].flags = 0;
1674 ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */
1675 ea->attrs[n].u.data = metrics[t];
1676 n++;
1677 }
1678
1679 if (n > 0)
1680 {
2feaa693 1681 ea->next = ra->eattrs;
9fdf9d29
OZ
1682 ea->flags = EALF_SORTED;
1683 ea->count = n;
2feaa693 1684 ra->eattrs = ea;
9fdf9d29
OZ
1685 }
1686 }
1687
2feaa693
OZ
1688 /*
1689 * Ideally, now we would send the received route to the rest of kernel code.
1690 * But IPv6 ECMP routes are sent as a sequence of routes, so we postpone it
1691 * and merge next hops until the end of the sequence.
1692 */
1693
1694 if (!s->net)
1695 {
1696 /* Store the new route */
1697 s->net = net;
1698 s->attrs = ra;
1699 s->proto = p;
1700 s->new = new;
1701 s->krt_src = src;
1702 s->krt_type = i->rtm_type;
1703 s->krt_proto = i->rtm_protocol;
1704 s->krt_metric = priority;
1705 }
95616c82 1706 else
2feaa693
OZ
1707 {
1708 /* Merge next hops with the stored route */
62e64905 1709 rta *oa = s->attrs;
2feaa693 1710
62e64905
OZ
1711 struct nexthop *nhs = &oa->nh;
1712 nexthop_insert(&nhs, &ra->nh);
1713
1714 /* Perhaps new nexthop is inserted at the first position */
1715 if (nhs == &ra->nh)
1716 {
1717 /* Swap rtas */
1718 s->attrs = ra;
1719
1720 /* Keep old eattrs */
1721 ra->eattrs = oa->eattrs;
1722 }
2feaa693 1723 }
95616c82
OZ
1724}
1725
1726void
1727krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
1728{
1729 struct nlmsghdr *h;
2feaa693 1730 struct nl_parse_state s;
95616c82 1731
cc5b93f7 1732 nl_parse_begin(&s, 1, 0);
d7661fbe 1733 nl_request_dump(AF_INET, RTM_GETROUTE);
95616c82
OZ
1734 while (h = nl_get_scan())
1735 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
2feaa693 1736 nl_parse_route(&s, h);
95616c82
OZ
1737 else
1738 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
cc5b93f7 1739 nl_parse_end(&s);
29a64162 1740
cc5b93f7 1741 nl_parse_begin(&s, 1, 1);
d7661fbe
JMM
1742 nl_request_dump(AF_INET6, RTM_GETROUTE);
1743 while (h = nl_get_scan())
1744 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
cc5b93f7 1745 nl_parse_route(&s, h);
d7661fbe
JMM
1746 else
1747 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
2feaa693 1748 nl_parse_end(&s);
d14f8c3c
JMM
1749
1750 nl_parse_begin(&s, 1, 1);
1751 nl_request_dump(AF_MPLS, RTM_GETROUTE);
1752 while (h = nl_get_scan())
1753 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
1754 nl_parse_route(&s, h);
1755 else
1756 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
1757 nl_parse_end(&s);
95616c82
OZ
1758}
1759
1760/*
1761 * Asynchronous Netlink interface
1762 */
1763
1764static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
1765static byte *nl_async_rx_buffer; /* Receive buffer */
1766
1767static void
1768nl_async_msg(struct nlmsghdr *h)
1769{
2feaa693
OZ
1770 struct nl_parse_state s;
1771
95616c82
OZ
1772 switch (h->nlmsg_type)
1773 {
1774 case RTM_NEWROUTE:
1775 case RTM_DELROUTE:
1776 DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
2feaa693
OZ
1777 nl_parse_begin(&s, 0, 0);
1778 nl_parse_route(&s, h);
1779 nl_parse_end(&s);
95616c82
OZ
1780 break;
1781 case RTM_NEWLINK:
1782 case RTM_DELLINK:
1783 DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
1e4891e4
OZ
1784 if (kif_proto)
1785 nl_parse_link(h, 0);
95616c82
OZ
1786 break;
1787 case RTM_NEWADDR:
1788 case RTM_DELADDR:
1789 DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
1e4891e4
OZ
1790 if (kif_proto)
1791 nl_parse_addr(h, 0);
95616c82
OZ
1792 break;
1793 default:
1794 DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
1795 }
1796}
1797
1798static int
3e236955 1799nl_async_hook(sock *sk, uint size UNUSED)
95616c82
OZ
1800{
1801 struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
1802 struct sockaddr_nl sa;
31e9e101
ST
1803 struct msghdr m = {
1804 .msg_name = &sa,
1805 .msg_namelen = sizeof(sa),
1806 .msg_iov = &iov,
1807 .msg_iovlen = 1,
1808 };
95616c82
OZ
1809 struct nlmsghdr *h;
1810 int x;
ae80a2de 1811 uint len;
95616c82
OZ
1812
1813 x = recvmsg(sk->fd, &m, 0);
1814 if (x < 0)
1815 {
1816 if (errno == ENOBUFS)
1817 {
1818 /*
1819 * Netlink reports some packets have been thrown away.
1820 * One day we might react to it by asking for route table
1821 * scan in near future.
1822 */
2c33da50 1823 log(L_WARN "Kernel dropped some netlink messages, will resync on next scan.");
95616c82
OZ
1824 return 1; /* More data are likely to be ready */
1825 }
1826 else if (errno != EWOULDBLOCK)
1827 log(L_ERR "Netlink recvmsg: %m");
1828 return 0;
1829 }
1830 if (sa.nl_pid) /* It isn't from the kernel */
1831 {
1832 DBG("Non-kernel packet\n");
1833 return 1;
1834 }
1835 h = (void *) nl_async_rx_buffer;
1836 len = x;
1837 if (m.msg_flags & MSG_TRUNC)
1838 {
1839 log(L_WARN "Netlink got truncated asynchronous message");
1840 return 1;
1841 }
1842 while (NLMSG_OK(h, len))
1843 {
1844 nl_async_msg(h);
1845 h = NLMSG_NEXT(h, len);
1846 }
1847 if (len)
1848 log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
1849 return 1;
1850}
1851
ccd2a3ed
JMM
1852static void
1853nl_async_err_hook(sock *sk, int e UNUSED)
1854{
1855 nl_async_hook(sk, 0);
1856}
1857
95616c82
OZ
1858static void
1859nl_open_async(void)
1860{
1861 sock *sk;
1862 struct sockaddr_nl sa;
1863 int fd;
95616c82 1864
f83ce94d 1865 if (nl_async_sk)
95616c82 1866 return;
95616c82
OZ
1867
1868 DBG("KRT: Opening async netlink socket\n");
1869
1870 fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1871 if (fd < 0)
1872 {
1873 log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
1874 return;
1875 }
1876
1877 bzero(&sa, sizeof(sa));
1878 sa.nl_family = AF_NETLINK;
29a64162
OZ
1879 sa.nl_groups = RTMGRP_LINK |
1880 RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE |
1881 RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
1882
95616c82
OZ
1883 if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
1884 {
1885 log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
f83ce94d 1886 close(fd);
95616c82
OZ
1887 return;
1888 }
1889
f83ce94d
OZ
1890 nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
1891
95616c82
OZ
1892 sk = nl_async_sk = sk_new(krt_pool);
1893 sk->type = SK_MAGIC;
1894 sk->rx_hook = nl_async_hook;
ccd2a3ed 1895 sk->err_hook = nl_async_err_hook;
95616c82 1896 sk->fd = fd;
05476c4d 1897 if (sk_open(sk) < 0)
95616c82 1898 bug("Netlink: sk_open failed");
95616c82
OZ
1899}
1900
9ddbfbdd 1901
95616c82
OZ
1902/*
1903 * Interface to the UNIX krt module
1904 */
1905
95616c82 1906void
9ddbfbdd
JMM
1907krt_sys_io_init(void)
1908{
05d47bd5 1909 nl_linpool = lp_new_default(krt_pool);
9ddbfbdd
JMM
1910 HASH_INIT(nl_table_map, krt_pool, 6);
1911}
1912
1913int
c6964c30 1914krt_sys_start(struct krt_proto *p)
95616c82 1915{
29a64162 1916 struct krt_proto *old = HASH_FIND(nl_table_map, RTH, p->af, krt_table_id(p));
9ddbfbdd
JMM
1917
1918 if (old)
1919 {
1920 log(L_ERR "%s: Kernel table %u already registered by %s",
1921 p->p.name, krt_table_id(p), old->p.name);
1922 return 0;
1923 }
1924
1925 HASH_INSERT2(nl_table_map, RTH, krt_pool, p);
c6964c30
OZ
1926
1927 nl_open();
1928 nl_open_async();
9ddbfbdd
JMM
1929
1930 return 1;
95616c82
OZ
1931}
1932
1933void
9ddbfbdd 1934krt_sys_shutdown(struct krt_proto *p)
95616c82 1935{
9ddbfbdd 1936 HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
95616c82
OZ
1937}
1938
1939int
1940krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
1941{
4adcb9df 1942 return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
95616c82
OZ
1943}
1944
95616c82
OZ
1945void
1946krt_sys_init_config(struct krt_config *cf)
1947{
1948 cf->sys.table_id = RT_TABLE_MAIN;
bff21441 1949 cf->sys.metric = 32;
95616c82
OZ
1950}
1951
1952void
1953krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
1954{
1955 d->sys.table_id = s->sys.table_id;
4adcb9df 1956 d->sys.metric = s->sys.metric;
95616c82
OZ
1957}
1958
9fdf9d29
OZ
1959static const char *krt_metrics_names[KRT_METRICS_MAX] = {
1960 NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
1961 "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
1962};
1963
1964static const char *krt_features_names[KRT_FEATURES_MAX] = {
1965 "ecn", NULL, NULL, "allfrag"
1966};
1967
1968int
1969krt_sys_get_attr(eattr *a, byte *buf, int buflen UNUSED)
1970{
1971 switch (a->id)
1972 {
1973 case EA_KRT_PREFSRC:
1974 bsprintf(buf, "prefsrc");
1975 return GA_NAME;
1976
1977 case EA_KRT_REALM:
1978 bsprintf(buf, "realm");
1979 return GA_NAME;
1980
6e75d0d2
OZ
1981 case EA_KRT_SCOPE:
1982 bsprintf(buf, "scope");
1983 return GA_NAME;
1984
9fdf9d29
OZ
1985 case EA_KRT_LOCK:
1986 buf += bsprintf(buf, "lock:");
1987 ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
1988 return GA_FULL;
1989
1990 case EA_KRT_FEATURES:
1991 buf += bsprintf(buf, "features:");
1992 ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
1993 return GA_FULL;
1994
1995 default:;
1996 int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET;
1997 if (id > 0 && id < KRT_METRICS_MAX)
1998 {
1999 bsprintf(buf, "%s", krt_metrics_names[id]);
2000 return GA_NAME;
2001 }
2002
2003 return GA_UNKNOWN;
2004 }
2005}
2006
95616c82
OZ
2007
2008
2009void
2010kif_sys_start(struct kif_proto *p UNUSED)
2011{
2012 nl_open();
2013 nl_open_async();
2014}
2015
2016void
2017kif_sys_shutdown(struct kif_proto *p UNUSED)
2018{
2019}