]> git.ipfire.org Git - thirdparty/bird.git/blame - sysdep/linux/netlink.c
KRT: Allow to learn routes with RTPROT_KERNEL
[thirdparty/bird.git] / sysdep / linux / netlink.c
CommitLineData
95616c82
OZ
1/*
2 * BIRD -- Linux Netlink Interface
3 *
4 * (c) 1999--2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9#include <stdio.h>
f83ce94d 10#include <unistd.h>
95616c82
OZ
11#include <fcntl.h>
12#include <sys/socket.h>
13#include <sys/uio.h>
14#include <errno.h>
15
16#undef LOCAL_DEBUG
17
18#include "nest/bird.h"
19#include "nest/route.h"
20#include "nest/protocol.h"
21#include "nest/iface.h"
4e276a89 22#include "lib/alloca.h"
7152e5ef
JMM
23#include "sysdep/unix/unix.h"
24#include "sysdep/unix/krt.h"
95616c82
OZ
25#include "lib/socket.h"
26#include "lib/string.h"
9ddbfbdd 27#include "lib/hash.h"
95616c82
OZ
28#include "conf/conf.h"
29
176fc68a 30#include CONFIG_INCLUDE_NLSYS_H
8988264a 31
8235c474 32#define krt_ipv4(p) ((p)->af == AF_INET)
2feaa693 33
517d05df
OZ
34const int rt_default_ecmp = 16;
35
2feaa693
OZ
36struct nl_parse_state
37{
722daa95 38 struct krt_proto *proto;
2feaa693
OZ
39 struct linpool *pool;
40 int scan;
2feaa693 41
722daa95 42 u32 rta_flow;
2feaa693
OZ
43};
44
95616c82
OZ
45/*
46 * Synchronous Netlink interface
47 */
48
49struct nl_sock
50{
51 int fd;
52 u32 seq;
53 byte *rx_buffer; /* Receive buffer */
54 struct nlmsghdr *last_hdr; /* Recently received packet */
ae80a2de 55 uint last_size;
95616c82
OZ
56};
57
e818f164 58#define NL_RX_SIZE 32768
95616c82 59
2feaa693
OZ
60#define NL_OP_DELETE 0
61#define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
62#define NL_OP_REPLACE (NLM_F_CREATE|NLM_F_REPLACE)
63#define NL_OP_APPEND (NLM_F_CREATE|NLM_F_APPEND)
64
65static linpool *nl_linpool;
66
95616c82
OZ
67static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for synchronous scan */
68static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */
69
70static void
71nl_open_sock(struct nl_sock *nl)
72{
73 if (nl->fd < 0)
74 {
75 nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
76 if (nl->fd < 0)
77 die("Unable to open rtnetlink socket: %m");
574b2324 78 nl->seq = (u32) (current_time() TO_S); /* Or perhaps random_u32() ? */
95616c82
OZ
79 nl->rx_buffer = xmalloc(NL_RX_SIZE);
80 nl->last_hdr = NULL;
81 nl->last_size = 0;
82 }
83}
84
534d0a4b 85static int
ef614f29 86nl_set_strict_dump(struct nl_sock *nl UNUSED, int strict UNUSED)
e818f164 87{
bbc33f6e 88#ifdef SOL_NETLINK
534d0a4b
OZ
89 return setsockopt(nl->fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &strict, sizeof(strict));
90#else
91 return -1;
bbc33f6e 92#endif
e818f164
OZ
93}
94
81ee6cda
OZ
95static void
96nl_set_rcvbuf(int fd, uint val)
97{
98 if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)) < 0)
99 log(L_WARN "KRT: Cannot set netlink rx buffer size to %u: %m", val);
100}
101
102static uint
103nl_cfg_rx_buffer_size(struct config *cfg)
104{
105 uint bufsize = 0;
106
107 struct proto_config *pc;
108 WALK_LIST(pc, cfg->protos)
109 if ((pc->protocol == &proto_unix_kernel) && !pc->disabled)
110 bufsize = MAX(bufsize, ((struct krt_config *) pc)->sys.netlink_rx_buffer);
111
112 return bufsize;
113}
114
115
95616c82
OZ
116static void
117nl_open(void)
118{
534d0a4b
OZ
119 if ((nl_scan.fd >= 0) && (nl_req.fd >= 0))
120 return;
121
95616c82
OZ
122 nl_open_sock(&nl_scan);
123 nl_open_sock(&nl_req);
e818f164 124
534d0a4b
OZ
125 if (nl_set_strict_dump(&nl_scan, 1) < 0)
126 {
127 log(L_WARN "KRT: Netlink strict checking failed, will scan all tables at once");
128 krt_use_shared_scan();
129 }
95616c82
OZ
130}
131
132static void
133nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
134{
135 struct sockaddr_nl sa;
136
137 memset(&sa, 0, sizeof(sa));
138 sa.nl_family = AF_NETLINK;
139 nh->nlmsg_pid = 0;
140 nh->nlmsg_seq = ++(nl->seq);
53401bef 141 nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len);
95616c82
OZ
142 if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
143 die("rtnetlink sendto: %m");
144 nl->last_hdr = NULL;
145}
146
147static void
e818f164 148nl_request_dump_link(void)
95616c82
OZ
149{
150 struct {
151 struct nlmsghdr nh;
e818f164 152 struct ifinfomsg ifi;
641172c6 153 } req = {
e818f164
OZ
154 .nh.nlmsg_type = RTM_GETLINK,
155 .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
641172c6 156 .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
e818f164
OZ
157 .nh.nlmsg_seq = ++(nl_scan.seq),
158 .ifi.ifi_family = AF_UNSPEC,
641172c6 159 };
e818f164
OZ
160
161 send(nl_scan.fd, &req, sizeof(req), 0);
162 nl_scan.last_hdr = NULL;
95616c82
OZ
163}
164
e818f164
OZ
165static void
166nl_request_dump_addr(int af)
167{
168 struct {
169 struct nlmsghdr nh;
170 struct ifaddrmsg ifa;
171 } req = {
172 .nh.nlmsg_type = RTM_GETADDR,
173 .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
174 .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
175 .nh.nlmsg_seq = ++(nl_scan.seq),
176 .ifa.ifa_family = af,
177 };
178
179 send(nl_scan.fd, &req, sizeof(req), 0);
180 nl_scan.last_hdr = NULL;
181}
182
183static void
534d0a4b 184nl_request_dump_route(int af, int table_id)
e818f164
OZ
185{
186 struct {
187 struct nlmsghdr nh;
188 struct rtmsg rtm;
534d0a4b
OZ
189 struct rtattr rta;
190 u32 table_id;
e818f164
OZ
191 } req = {
192 .nh.nlmsg_type = RTM_GETROUTE,
193 .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)),
194 .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
195 .nh.nlmsg_seq = ++(nl_scan.seq),
196 .rtm.rtm_family = af,
197 };
198
534d0a4b
OZ
199 if (table_id < 256)
200 req.rtm.rtm_table = table_id;
201 else
202 {
203 req.rta.rta_type = RTA_TABLE;
204 req.rta.rta_len = RTA_LENGTH(4);
205 req.table_id = table_id;
206 req.nh.nlmsg_len = NLMSG_ALIGN(req.nh.nlmsg_len) + req.rta.rta_len;
207 }
208
209 send(nl_scan.fd, &req, req.nh.nlmsg_len, 0);
e818f164
OZ
210 nl_scan.last_hdr = NULL;
211}
212
213
95616c82
OZ
214static struct nlmsghdr *
215nl_get_reply(struct nl_sock *nl)
216{
217 for(;;)
218 {
219 if (!nl->last_hdr)
220 {
221 struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
222 struct sockaddr_nl sa;
31e9e101
ST
223 struct msghdr m = {
224 .msg_name = &sa,
225 .msg_namelen = sizeof(sa),
226 .msg_iov = &iov,
227 .msg_iovlen = 1,
228 };
95616c82
OZ
229 int x = recvmsg(nl->fd, &m, 0);
230 if (x < 0)
231 die("nl_get_reply: %m");
232 if (sa.nl_pid) /* It isn't from the kernel */
233 {
234 DBG("Non-kernel packet\n");
235 continue;
236 }
237 nl->last_size = x;
238 nl->last_hdr = (void *) nl->rx_buffer;
239 if (m.msg_flags & MSG_TRUNC)
240 bug("nl_get_reply: got truncated reply which should be impossible");
241 }
242 if (NLMSG_OK(nl->last_hdr, nl->last_size))
243 {
244 struct nlmsghdr *h = nl->last_hdr;
245 nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
246 if (h->nlmsg_seq != nl->seq)
247 {
248 log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
249 h->nlmsg_seq, nl->seq);
250 continue;
251 }
252 return h;
253 }
254 if (nl->last_size)
255 log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
256 nl->last_hdr = NULL;
257 }
258}
259
1123e707 260static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
95616c82
OZ
261
262static int
2feaa693 263nl_error(struct nlmsghdr *h, int ignore_esrch)
95616c82
OZ
264{
265 struct nlmsgerr *e;
266 int ec;
267
268 if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
269 {
270 log(L_WARN "Netlink: Truncated error message received");
271 return ENOBUFS;
272 }
273 e = (struct nlmsgerr *) NLMSG_DATA(h);
176fc68a 274 ec = netlink_error_to_os(e->error);
2feaa693 275 if (ec && !(ignore_esrch && (ec == ESRCH)))
95616c82
OZ
276 log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
277 return ec;
278}
279
280static struct nlmsghdr *
281nl_get_scan(void)
282{
283 struct nlmsghdr *h = nl_get_reply(&nl_scan);
284
285 if (h->nlmsg_type == NLMSG_DONE)
286 return NULL;
287 if (h->nlmsg_type == NLMSG_ERROR)
288 {
2feaa693 289 nl_error(h, 0);
95616c82
OZ
290 return NULL;
291 }
292 return h;
293}
294
295static int
2feaa693 296nl_exchange(struct nlmsghdr *pkt, int ignore_esrch)
95616c82
OZ
297{
298 struct nlmsghdr *h;
299
300 nl_send(&nl_req, pkt);
301 for(;;)
302 {
303 h = nl_get_reply(&nl_req);
304 if (h->nlmsg_type == NLMSG_ERROR)
305 break;
306 log(L_WARN "nl_exchange: Unexpected reply received");
307 }
2feaa693 308 return nl_error(h, ignore_esrch) ? -1 : 0;
95616c82
OZ
309}
310
311/*
312 * Netlink attributes
313 */
314
315static int nl_attr_len;
316
317static void *
318nl_checkin(struct nlmsghdr *h, int lsize)
319{
320 nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
321 if (nl_attr_len < 0)
322 {
323 log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
324 return NULL;
325 }
326 return NLMSG_DATA(h);
327}
328
ad276157
JMM
329struct nl_want_attrs {
330 u8 defined:1;
331 u8 checksize:1;
332 u8 size;
333};
334
335
e3c0eca9 336#define BIRD_IFLA_MAX (IFLA_LINKINFO+1)
ad276157
JMM
337
338static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = {
339 [IFLA_IFNAME] = { 1, 0, 0 },
340 [IFLA_MTU] = { 1, 1, sizeof(u32) },
943478b0 341 [IFLA_MASTER] = { 1, 1, sizeof(u32) },
ad276157 342 [IFLA_WIRELESS] = { 1, 0, 0 },
e3c0eca9
OZ
343 [IFLA_LINKINFO] = { 1, 0, 0 },
344};
345
346#define BIRD_INFO_MAX (IFLA_INFO_DATA+1)
347
348static struct nl_want_attrs ifinfo_attr_want[BIRD_INFO_MAX] = {
349 [IFLA_INFO_KIND]= { 1, 0, 0 },
350 [IFLA_INFO_DATA]= { 1, 0, 0 },
ad276157
JMM
351};
352
29a64162 353
e37d2e3e 354#define BIRD_IFA_MAX (IFA_FLAGS+1)
ad276157 355
ad276157
JMM
356static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = {
357 [IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) },
358 [IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) },
359 [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) },
62e64905 360 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
ad276157 361};
29a64162 362
ad276157
JMM
363static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
364 [IFA_ADDRESS] = { 1, 1, sizeof(ip6_addr) },
365 [IFA_LOCAL] = { 1, 1, sizeof(ip6_addr) },
e37d2e3e 366 [IFA_FLAGS] = { 1, 1, sizeof(u32) },
ad276157 367};
29a64162 368
ad276157 369
d14f8c3c 370#define BIRD_RTA_MAX (RTA_ENCAP+1)
ad276157 371
4e276a89 372static struct nl_want_attrs nexthop_attr_want4[BIRD_RTA_MAX] = {
ad276157 373 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
53401bef 374 [RTA_VIA] = { 1, 0, 0 },
21f9acd2 375 [RTA_FLOW] = { 1, 1, sizeof(u32) },
d14f8c3c
JMM
376 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
377 [RTA_ENCAP] = { 1, 0, 0 },
378};
379
4ff15a75 380static struct nl_want_attrs nexthop_attr_want6[BIRD_RTA_MAX] = {
98bb80a2 381 [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
53401bef 382 [RTA_VIA] = { 1, 0, 0 },
21f9acd2 383 [RTA_FLOW] = { 1, 1, sizeof(u32) },
4ff15a75
OZ
384 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
385 [RTA_ENCAP] = { 1, 0, 0 },
386};
387
6b0f5f68 388#ifdef HAVE_MPLS_KERNEL
f1b5f179
KY
389static struct nl_want_attrs nexthop_attr_want_mpls[BIRD_RTA_MAX] = {
390 [RTA_VIA] = { 1, 0, 0 },
391 [RTA_NEWDST] = { 1, 0, 0 },
392};
393
d14f8c3c
JMM
394static struct nl_want_attrs encap_mpls_want[BIRD_RTA_MAX] = {
395 [RTA_DST] = { 1, 0, 0 },
ad276157 396};
6b0f5f68 397#endif
ad276157 398
ad276157
JMM
399static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
400 [RTA_DST] = { 1, 1, sizeof(ip4_addr) },
401 [RTA_OIF] = { 1, 1, sizeof(u32) },
402 [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
403 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
404 [RTA_PREFSRC] = { 1, 1, sizeof(ip4_addr) },
405 [RTA_METRICS] = { 1, 0, 0 },
406 [RTA_MULTIPATH] = { 1, 0, 0 },
407 [RTA_FLOW] = { 1, 1, sizeof(u32) },
408 [RTA_TABLE] = { 1, 1, sizeof(u32) },
53401bef 409 [RTA_VIA] = { 1, 0, 0 },
d14f8c3c
JMM
410 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
411 [RTA_ENCAP] = { 1, 0, 0 },
ad276157 412};
29a64162 413
ad276157
JMM
414static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
415 [RTA_DST] = { 1, 1, sizeof(ip6_addr) },
be17805c 416 [RTA_SRC] = { 1, 1, sizeof(ip6_addr) },
ad276157
JMM
417 [RTA_IIF] = { 1, 1, sizeof(u32) },
418 [RTA_OIF] = { 1, 1, sizeof(u32) },
419 [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
420 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
421 [RTA_PREFSRC] = { 1, 1, sizeof(ip6_addr) },
422 [RTA_METRICS] = { 1, 0, 0 },
98bb80a2 423 [RTA_MULTIPATH] = { 1, 0, 0 },
ad276157
JMM
424 [RTA_FLOW] = { 1, 1, sizeof(u32) },
425 [RTA_TABLE] = { 1, 1, sizeof(u32) },
53401bef 426 [RTA_VIA] = { 1, 0, 0 },
d14f8c3c
JMM
427 [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
428 [RTA_ENCAP] = { 1, 0, 0 },
429};
430
6b0f5f68 431#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
432static struct nl_want_attrs rtm_attr_want_mpls[BIRD_RTA_MAX] = {
433 [RTA_DST] = { 1, 1, sizeof(u32) },
434 [RTA_IIF] = { 1, 1, sizeof(u32) },
435 [RTA_OIF] = { 1, 1, sizeof(u32) },
436 [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
437 [RTA_METRICS] = { 1, 0, 0 },
f1b5f179 438 [RTA_MULTIPATH] = { 1, 0, 0 },
d14f8c3c
JMM
439 [RTA_FLOW] = { 1, 1, sizeof(u32) },
440 [RTA_TABLE] = { 1, 1, sizeof(u32) },
441 [RTA_VIA] = { 1, 0, 0 },
442 [RTA_NEWDST] = { 1, 0, 0 },
ad276157 443};
6b0f5f68 444#endif
ad276157
JMM
445
446
95616c82 447static int
ad276157 448nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, int ksize)
95616c82
OZ
449{
450 int max = ksize / sizeof(struct rtattr *);
451 bzero(k, ksize);
ad276157
JMM
452
453 for ( ; RTA_OK(a, nl_attr_len); a = RTA_NEXT(a, nl_attr_len))
95616c82 454 {
ad276157
JMM
455 if ((a->rta_type >= max) || !want[a->rta_type].defined)
456 continue;
457
458 if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size))
459 {
9b136840 460 log(L_ERR "nl_parse_attrs: Malformed attribute received");
ad276157
JMM
461 return 0;
462 }
463
464 k[a->rta_type] = a;
95616c82 465 }
ad276157 466
95616c82
OZ
467 if (nl_attr_len)
468 {
469 log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
470 return 0;
471 }
ad276157
JMM
472
473 return 1;
95616c82
OZ
474}
475
d14f8c3c
JMM
476static inline u16 rta_get_u16(struct rtattr *a)
477{ return *(u16 *) RTA_DATA(a); }
478
fce764f9 479static inline u32 rta_get_u32(struct rtattr *a)
acb04cfd
OZ
480{ return *(u32 *) RTA_DATA(a); }
481
482static inline ip4_addr rta_get_ip4(struct rtattr *a)
483{ return ip4_ntoh(*(ip4_addr *) RTA_DATA(a)); }
484
485static inline ip6_addr rta_get_ip6(struct rtattr *a)
486{ return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); }
487
9b136840
JMM
488static inline ip_addr rta_get_ipa(struct rtattr *a)
489{
490 if (RTA_PAYLOAD(a) == sizeof(ip4_addr))
491 return ipa_from_ip4(rta_get_ip4(a));
492 else
493 return ipa_from_ip6(rta_get_ip6(a));
494}
acb04cfd 495
d14f8c3c
JMM
496static inline ip_addr rta_get_via(struct rtattr *a)
497{
498 struct rtvia *v = RTA_DATA(a);
499 switch(v->rtvia_family) {
500 case AF_INET: return ipa_from_ip4(ip4_ntoh(*(ip4_addr *) v->rtvia_addr));
501 case AF_INET6: return ipa_from_ip6(ip6_ntoh(*(ip6_addr *) v->rtvia_addr));
502 }
503 return IPA_NONE;
504}
505
f8bcb037 506#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
507static u32 rta_mpls_stack[MPLS_MAX_LABEL_STACK];
508static inline int rta_get_mpls(struct rtattr *a, u32 *stack)
509{
2eaf65ec
OZ
510 if (!a)
511 return 0;
512
d14f8c3c
JMM
513 if (RTA_PAYLOAD(a) % 4)
514 log(L_WARN "KRT: Strange length of received MPLS stack: %u", RTA_PAYLOAD(a));
515
2eaf65ec
OZ
516 int labels = mpls_get(RTA_DATA(a), RTA_PAYLOAD(a) & ~0x3, stack);
517
518 if (labels < 0)
519 {
520 log(L_WARN "KRT: Too long MPLS stack received, ignoring");
521 labels = 0;
522 }
523
524 return labels;
d14f8c3c 525}
6b0f5f68 526#endif
d14f8c3c 527
9fdf9d29
OZ
528struct rtattr *
529nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen)
95616c82 530{
9fdf9d29
OZ
531 uint pos = NLMSG_ALIGN(h->nlmsg_len);
532 uint len = RTA_LENGTH(dlen);
95616c82
OZ
533
534 if (pos + len > bufsize)
535 bug("nl_add_attr: packet buffer overflow");
9fdf9d29
OZ
536
537 struct rtattr *a = (struct rtattr *)((char *)h + pos);
95616c82
OZ
538 a->rta_type = code;
539 a->rta_len = len;
540 h->nlmsg_len = pos + len;
9fdf9d29
OZ
541
542 if (dlen > 0)
543 memcpy(RTA_DATA(a), data, dlen);
544
545 return a;
95616c82
OZ
546}
547
d14f8c3c
JMM
548static inline struct rtattr *
549nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
550{
551 return nl_add_attr(h, bufsize, code, NULL, 0);
552}
553
554static inline void
555nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
556{
557 a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a;
558}
559
560static inline void
561nl_add_attr_u16(struct nlmsghdr *h, uint bufsize, int code, u16 data)
562{
563 nl_add_attr(h, bufsize, code, &data, 2);
564}
565
95616c82 566static inline void
29a64162 567nl_add_attr_u32(struct nlmsghdr *h, uint bufsize, int code, u32 data)
95616c82
OZ
568{
569 nl_add_attr(h, bufsize, code, &data, 4);
570}
571
572static inline void
29a64162 573nl_add_attr_ip4(struct nlmsghdr *h, uint bufsize, int code, ip4_addr ip4)
95616c82 574{
29a64162
OZ
575 ip4 = ip4_hton(ip4);
576 nl_add_attr(h, bufsize, code, &ip4, sizeof(ip4));
577}
578
579static inline void
580nl_add_attr_ip6(struct nlmsghdr *h, uint bufsize, int code, ip6_addr ip6)
581{
582 ip6 = ip6_hton(ip6);
583 nl_add_attr(h, bufsize, code, &ip6, sizeof(ip6));
584}
585
586static inline void
587nl_add_attr_ipa(struct nlmsghdr *h, uint bufsize, int code, ip_addr ipa)
588{
589 if (ipa_is_ip4(ipa))
590 nl_add_attr_ip4(h, bufsize, code, ipa_to_ip4(ipa));
9b136840 591 else
29a64162 592 nl_add_attr_ip6(h, bufsize, code, ipa_to_ip6(ipa));
95616c82
OZ
593}
594
6b0f5f68 595#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
596static inline void
597nl_add_attr_mpls(struct nlmsghdr *h, uint bufsize, int code, int len, u32 *stack)
9fdf9d29 598{
d14f8c3c
JMM
599 char buf[len*4];
600 mpls_put(buf, len, stack);
601 nl_add_attr(h, bufsize, code, buf, len*4);
9fdf9d29 602}
95616c82
OZ
603
604static inline void
d14f8c3c 605nl_add_attr_mpls_encap(struct nlmsghdr *h, uint bufsize, int len, u32 *stack)
95616c82 606{
d14f8c3c
JMM
607 nl_add_attr_u16(h, bufsize, RTA_ENCAP_TYPE, LWTUNNEL_ENCAP_MPLS);
608
609 struct rtattr *nest = nl_open_attr(h, bufsize, RTA_ENCAP);
610 nl_add_attr_mpls(h, bufsize, RTA_DST, len, stack);
611 nl_close_attr(h, nest);
612}
613
614static inline void
615nl_add_attr_via(struct nlmsghdr *h, uint bufsize, ip_addr ipa)
616{
66acbc8d 617 struct rtvia *via = alloca(sizeof(struct rtvia) + 16);
d14f8c3c 618
62e64905
OZ
619 if (ipa_is_ip4(ipa))
620 {
d14f8c3c 621 via->rtvia_family = AF_INET;
62e64905 622 put_ip4(via->rtvia_addr, ipa_to_ip4(ipa));
66acbc8d 623 nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 4);
62e64905
OZ
624 }
625 else
626 {
d14f8c3c 627 via->rtvia_family = AF_INET6;
62e64905 628 put_ip6(via->rtvia_addr, ipa_to_ip6(ipa));
66acbc8d 629 nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 16);
d14f8c3c 630 }
95616c82 631}
6b0f5f68 632#endif
95616c82 633
9fdf9d29
OZ
634static inline struct rtnexthop *
635nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
636{
637 uint pos = NLMSG_ALIGN(h->nlmsg_len);
638 uint len = RTNH_LENGTH(0);
639
640 if (pos + len > bufsize)
641 bug("nl_open_nexthop: packet buffer overflow");
642
643 h->nlmsg_len = pos + len;
644
645 return (void *)h + pos;
646}
647
648static inline void
649nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh)
650{
651 nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh;
652}
95616c82 653
d14f8c3c 654static inline void
6b0f5f68 655nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af UNUSED)
d14f8c3c 656{
6b0f5f68 657#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
658 if (nh->labels > 0)
659 if (af == AF_MPLS)
660 nl_add_attr_mpls(h, bufsize, RTA_NEWDST, nh->labels, nh->label);
661 else
662 nl_add_attr_mpls_encap(h, bufsize, nh->labels, nh->label);
663
664 if (ipa_nonzero(nh->gw))
53401bef
OZ
665 {
666 if (af == (ipa_is_ip4(nh->gw) ? AF_INET : AF_INET6))
d14f8c3c 667 nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
53401bef
OZ
668 else
669 nl_add_attr_via(h, bufsize, nh->gw);
670 }
6b0f5f68
MJM
671#else
672
673 if (ipa_nonzero(nh->gw))
674 nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
675#endif
d14f8c3c
JMM
676}
677
95616c82 678static void
21f9acd2 679nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af, ea_list *eattrs)
95616c82 680{
9fdf9d29 681 struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH);
21f9acd2 682 eattr *flow = ea_find(eattrs, EA_KRT_REALM);
9fdf9d29 683
95616c82 684 for (; nh; nh = nh->next)
9fdf9d29
OZ
685 {
686 struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize);
95616c82 687
9fdf9d29
OZ
688 rtnh->rtnh_flags = 0;
689 rtnh->rtnh_hops = nh->weight;
690 rtnh->rtnh_ifindex = nh->iface->index;
95616c82 691
d14f8c3c 692 nl_add_nexthop(h, bufsize, nh, af);
95616c82 693
a1f5e514
OZ
694 if (nh->flags & RNF_ONLINK)
695 rtnh->rtnh_flags |= RTNH_F_ONLINK;
696
21f9acd2
OZ
697 /* Our KRT_REALM is per-route, but kernel RTA_FLOW is per-nexthop.
698 Therefore, we need to attach the same attribute to each nexthop. */
699 if (flow)
700 nl_add_attr_u32(h, bufsize, RTA_FLOW, flow->u.data);
701
9fdf9d29
OZ
702 nl_close_nexthop(h, rtnh);
703 }
704
705 nl_close_attr(h, a);
706}
95616c82 707
4e276a89 708static struct nexthop *
f5c8fb5f 709nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, const net_addr *n, struct rtattr *ra, int af, int krt_src)
95616c82 710{
ad276157 711 struct rtattr *a[BIRD_RTA_MAX];
95616c82 712 struct rtnexthop *nh = RTA_DATA(ra);
4e276a89 713 struct nexthop *rv, *first, **last;
3e236955 714 unsigned len = RTA_PAYLOAD(ra);
95616c82
OZ
715
716 first = NULL;
717 last = &first;
95616c82
OZ
718
719 while (len)
720 {
721 /* Use RTNH_OK(nh,len) ?? */
722 if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
77d032c7 723 goto err;
95616c82 724
f5c8fb5f 725 if ((nh->rtnh_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD))
df83f626
OZ
726 goto next;
727
3e792350 728 *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE);
95616c82
OZ
729 last = &(rv->next);
730
731 rv->weight = nh->rtnh_hops;
732 rv->iface = if_find_by_index(nh->rtnh_ifindex);
733 if (!rv->iface)
77d032c7
OZ
734 {
735 log(L_ERR "KRT: Received route %N with unknown ifindex %u", n, nh->rtnh_ifindex);
736 return NULL;
737 }
95616c82
OZ
738
739 /* Nonexistent RTNH_PAYLOAD ?? */
740 nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
98bb80a2
OZ
741 switch (af)
742 {
98bb80a2 743 case AF_INET:
4ff15a75 744 if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a)))
77d032c7 745 goto err;
98bb80a2 746 break;
4ff15a75 747
98bb80a2 748 case AF_INET6:
4ff15a75 749 if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want6, a, sizeof(a)))
77d032c7 750 goto err;
98bb80a2 751 break;
4ff15a75 752
f1b5f179
KY
753#ifdef HAVE_MPLS_KERNEL
754 case AF_MPLS:
755 if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want_mpls, a, sizeof(a)))
77d032c7 756 goto err;
f1b5f179
KY
757
758 if (a[RTA_NEWDST])
759 rv->labels = rta_get_mpls(a[RTA_NEWDST], rv->label);
760
761 break;
762#endif
763
98bb80a2 764 default:
77d032c7 765 goto err;
98bb80a2
OZ
766 }
767
95616c82 768 if (a[RTA_GATEWAY])
53401bef 769 rv->gw = rta_get_ipa(a[RTA_GATEWAY]);
95616c82 770
21f9acd2
OZ
771 if (a[RTA_FLOW])
772 s->rta_flow = rta_get_u32(a[RTA_FLOW]);
773
53401bef
OZ
774 if (a[RTA_VIA])
775 rv->gw = rta_get_via(a[RTA_VIA]);
53401bef 776
32425297
OZ
777 if (nh->rtnh_flags & RTNH_F_ONLINK)
778 rv->flags |= RNF_ONLINK;
779
53401bef
OZ
780 if (ipa_nonzero(rv->gw))
781 {
23c212e7 782 neighbor *nbr;
586c1800
OZ
783 nbr = neigh_find(&p->p, rv->gw, rv->iface,
784 (rv->flags & RNF_ONLINK) ? NEF_ONLINK : 0);
23c212e7 785 if (!nbr || (nbr->scope == SCOPE_HOST))
77d032c7
OZ
786 {
787 log(L_ERR "KRT: Received route %N with strange next-hop %I", n, rv->gw);
788 return NULL;
789 }
95616c82 790 }
62e64905 791
6b0f5f68 792#ifdef HAVE_MPLS_KERNEL
2eaf65ec 793 if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE])
6b0f5f68 794 {
77d032c7
OZ
795 if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS)
796 {
797 log(L_WARN "KRT: Received route %N with unknown encapsulation method %d",
798 n, rta_get_u16(a[RTA_ENCAP_TYPE]));
6b0f5f68 799 return NULL;
d14f8c3c
JMM
800 }
801
6b0f5f68
MJM
802 struct rtattr *enca[BIRD_RTA_MAX];
803 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
804 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
805 rv->labels = rta_get_mpls(enca[RTA_DST], rv->label);
6b0f5f68
MJM
806 }
807#endif
808
df83f626 809 next:
95616c82
OZ
810 len -= NLMSG_ALIGN(nh->rtnh_len);
811 nh = RTNH_NEXT(nh);
812 }
813
59d3a361
OZ
814 /* Ensure nexthops are sorted to satisfy nest invariant */
815 if (!nexthop_is_sorted(first))
816 first = nexthop_sort(first);
817
95616c82 818 return first;
77d032c7
OZ
819
820err:
821 log(L_ERR "KRT: Received strange multipath route %N", n);
822 return NULL;
95616c82
OZ
823}
824
9fdf9d29
OZ
825static void
826nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, int max)
827{
828 struct rtattr *a = nl_open_attr(h, bufsize, RTA_METRICS);
829 int t;
830
831 for (t = 1; t < max; t++)
832 if (metrics[0] & (1 << t))
833 nl_add_attr_u32(h, bufsize, t, metrics[t]);
834
835 nl_close_attr(h, a);
836}
837
838static int
839nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int max)
840{
841 struct rtattr *a = RTA_DATA(hdr);
842 int len = RTA_PAYLOAD(hdr);
843
844 metrics[0] = 0;
845 for (; RTA_OK(a, len); a = RTA_NEXT(a, len))
846 {
847 if (a->rta_type == RTA_UNSPEC)
848 continue;
849
850 if (a->rta_type >= max)
851 continue;
852
853 if (RTA_PAYLOAD(a) != 4)
854 return -1;
855
856 metrics[0] |= 1 << a->rta_type;
acb04cfd 857 metrics[a->rta_type] = rta_get_u32(a);
9fdf9d29
OZ
858 }
859
860 if (len > 0)
861 return -1;
862
863 return 0;
864}
865
95616c82
OZ
866
867/*
868 * Scanning of interfaces
869 */
870
871static void
872nl_parse_link(struct nlmsghdr *h, int scan)
873{
874 struct ifinfomsg *i;
ad276157 875 struct rtattr *a[BIRD_IFLA_MAX];
95616c82
OZ
876 int new = h->nlmsg_type == RTM_NEWLINK;
877 struct iface f = {};
878 struct iface *ifi;
e3c0eca9 879 const char *name, *kind = NULL;
943478b0 880 u32 mtu, master = 0;
ae80a2de 881 uint fl;
95616c82 882
ad276157 883 if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), ifla_attr_want, a, sizeof(a)))
95616c82 884 return;
ad276157 885 if (!a[IFLA_IFNAME] || (RTA_PAYLOAD(a[IFLA_IFNAME]) < 2) || !a[IFLA_MTU])
95616c82 886 {
ad276157
JMM
887 /*
888 * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
889 * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
890 * We simply ignore all such messages with IFLA_WIRELESS without notice.
891 */
892
893 if (a[IFLA_WIRELESS])
894 return;
895
896 log(L_ERR "KIF: Malformed message received");
95616c82
OZ
897 return;
898 }
ad276157 899
95616c82 900 name = RTA_DATA(a[IFLA_IFNAME]);
acb04cfd 901 mtu = rta_get_u32(a[IFLA_MTU]);
95616c82 902
943478b0
OZ
903 if (a[IFLA_MASTER])
904 master = rta_get_u32(a[IFLA_MASTER]);
905
e3c0eca9
OZ
906 if (a[IFLA_LINKINFO])
907 {
908 struct rtattr *li[BIRD_INFO_MAX];
909 nl_attr_len = RTA_PAYLOAD(a[IFLA_LINKINFO]);
910 nl_parse_attrs(RTA_DATA(a[IFLA_LINKINFO]), ifinfo_attr_want, li, sizeof(li));
911 if (li[IFLA_INFO_KIND])
912 kind = RTA_DATA(li[IFLA_INFO_KIND]);
913 }
914
95616c82
OZ
915 ifi = if_find_by_index(i->ifi_index);
916 if (!new)
917 {
918 DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
919 if (!ifi)
920 return;
921
922 if_delete(ifi);
923 }
924 else
925 {
926 DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
927 if (ifi && strncmp(ifi->name, name, sizeof(ifi->name)-1))
928 if_delete(ifi);
929
930 strncpy(f.name, name, sizeof(f.name)-1);
931 f.index = i->ifi_index;
932 f.mtu = mtu;
933
943478b0
OZ
934 f.master_index = master;
935 f.master = if_find_by_index(master);
936
95616c82
OZ
937 fl = i->ifi_flags;
938 if (fl & IFF_UP)
939 f.flags |= IF_ADMIN_UP;
940 if (fl & IFF_LOWER_UP)
941 f.flags |= IF_LINK_UP;
942 if (fl & IFF_LOOPBACK) /* Loopback */
943 f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
944 else if (fl & IFF_POINTOPOINT) /* PtP */
945 f.flags |= IF_MULTICAST;
946 else if (fl & IFF_BROADCAST) /* Broadcast */
947 f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
948 else
949 f.flags |= IF_MULTIACCESS; /* NBMA */
3216eb03 950
16a3254c
OZ
951 if (fl & IFF_MULTICAST)
952 f.flags |= IF_MULTICAST;
953
e3c0eca9
OZ
954 if (kind && !strcmp(kind, "vrf"))
955 f.flags |= IF_VRF;
956
3216eb03
OZ
957 ifi = if_update(&f);
958
959 if (!scan)
960 if_end_partial_update(ifi);
95616c82
OZ
961 }
962}
963
964static void
9b136840 965nl_parse_addr4(struct ifaddrmsg *i, int scan, int new)
95616c82 966{
ad276157 967 struct rtattr *a[BIRD_IFA_MAX];
95616c82 968 struct iface *ifi;
e37d2e3e 969 u32 ifa_flags;
95616c82
OZ
970 int scope;
971
9b136840 972 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a)))
95616c82 973 return;
ad276157 974
9b136840 975 if (!a[IFA_LOCAL])
ad276157 976 {
9b136840
JMM
977 log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)");
978 return;
ad276157 979 }
ad276157 980 if (!a[IFA_ADDRESS])
95616c82 981 {
ad276157 982 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
95616c82
OZ
983 return;
984 }
985
986 ifi = if_find_by_index(i->ifa_index);
987 if (!ifi)
988 {
989 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
990 return;
991 }
992
e37d2e3e
OZ
993 if (a[IFA_FLAGS])
994 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
995 else
996 ifa_flags = i->ifa_flags;
997
9b136840 998 struct ifa ifa;
95616c82
OZ
999 bzero(&ifa, sizeof(ifa));
1000 ifa.iface = ifi;
cc5b93f7 1001 if (ifa_flags & IFA_F_SECONDARY)
95616c82
OZ
1002 ifa.flags |= IA_SECONDARY;
1003
9b136840
JMM
1004 ifa.ip = rta_get_ipa(a[IFA_LOCAL]);
1005
d7661fbe 1006 if (i->ifa_prefixlen > IP4_MAX_PREFIX_LENGTH)
95616c82
OZ
1007 {
1008 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
1009 new = 0;
1010 }
d7661fbe 1011 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH)
95616c82 1012 {
9b136840
JMM
1013 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
1014 net_fill_ip4(&ifa.prefix, rta_get_ip4(a[IFA_ADDRESS]), i->ifa_prefixlen);
95616c82
OZ
1015
1016 /* It is either a host address or a peer address */
9b136840 1017 if (ipa_equal(ifa.ip, ifa.brd))
95616c82
OZ
1018 ifa.flags |= IA_HOST;
1019 else
1020 {
1021 ifa.flags |= IA_PEER;
9b136840 1022 ifa.opposite = ifa.brd;
95616c82
OZ
1023 }
1024 }
1025 else
1026 {
9b136840
JMM
1027 net_fill_ip4(&ifa.prefix, ipa_to_ip4(ifa.ip), i->ifa_prefixlen);
1028 net_normalize(&ifa.prefix);
1029
d7661fbe 1030 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 1)
95616c82
OZ
1031 ifa.opposite = ipa_opposite_m1(ifa.ip);
1032
d7661fbe 1033 if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 2)
95616c82
OZ
1034 ifa.opposite = ipa_opposite_m2(ifa.ip);
1035
e2630a49
OZ
1036 if (ifi->flags & IF_BROADCAST)
1037 {
1038 /* If kernel offers us a broadcast address, we trust it */
1039 if (a[IFA_BROADCAST])
1040 ifa.brd = ipa_from_ip4(rta_get_ip4(a[IFA_BROADCAST]));
1041 /* Otherwise we create one (except for /31) */
1042 else if (i->ifa_prefixlen < (IP4_MAX_PREFIX_LENGTH - 1))
1043 ifa.brd = ipa_from_ip4(ip4_or(ipa_to_ip4(ifa.ip),
1044 ip4_not(ip4_mkmask(i->ifa_prefixlen))));
9b136840
JMM
1045 }
1046 }
1047
1048 scope = ipa_classify(ifa.ip);
1049 if (scope < 0)
1050 {
1051 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
1052 return;
1053 }
1054 ifa.scope = scope & IADDR_SCOPE_MASK;
1055
1056 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
1057 ifi->index, ifi->name,
1058 new ? "added" : "removed",
4659b2ae 1059 ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite);
9b136840
JMM
1060
1061 if (new)
1062 ifa_update(&ifa);
1063 else
1064 ifa_delete(&ifa);
1065
1066 if (!scan)
1067 if_end_partial_update(ifi);
1068}
1069
1070static void
1071nl_parse_addr6(struct ifaddrmsg *i, int scan, int new)
1072{
1073 struct rtattr *a[BIRD_IFA_MAX];
1074 struct iface *ifi;
cc5b93f7 1075 u32 ifa_flags;
9b136840
JMM
1076 int scope;
1077
1078 if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a)))
1079 return;
1080
1081 if (!a[IFA_ADDRESS])
1082 {
1083 log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
1084 return;
1085 }
1086
1087 ifi = if_find_by_index(i->ifa_index);
1088 if (!ifi)
1089 {
1090 log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
1091 return;
1092 }
1093
cc5b93f7
OZ
1094 if (a[IFA_FLAGS])
1095 ifa_flags = rta_get_u32(a[IFA_FLAGS]);
1096 else
1097 ifa_flags = i->ifa_flags;
1098
9b136840
JMM
1099 struct ifa ifa;
1100 bzero(&ifa, sizeof(ifa));
1101 ifa.iface = ifi;
e37d2e3e 1102 if (ifa_flags & IFA_F_SECONDARY)
9b136840
JMM
1103 ifa.flags |= IA_SECONDARY;
1104
e37d2e3e
OZ
1105 /* Ignore tentative addresses silently */
1106 if (ifa_flags & IFA_F_TENTATIVE)
1107 return;
9b136840 1108
95616c82 1109 /* IFA_LOCAL can be unset for IPv6 interfaces */
9b136840
JMM
1110 ifa.ip = rta_get_ipa(a[IFA_LOCAL] ? : a[IFA_ADDRESS]);
1111
d7661fbe 1112 if (i->ifa_prefixlen > IP6_MAX_PREFIX_LENGTH)
9b136840
JMM
1113 {
1114 log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
1115 new = 0;
1116 }
d7661fbe 1117 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH)
9b136840
JMM
1118 {
1119 ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
1120 net_fill_ip6(&ifa.prefix, rta_get_ip6(a[IFA_ADDRESS]), i->ifa_prefixlen);
1121
1122 /* It is either a host address or a peer address */
1123 if (ipa_equal(ifa.ip, ifa.brd))
1124 ifa.flags |= IA_HOST;
1125 else
1126 {
1127 ifa.flags |= IA_PEER;
1128 ifa.opposite = ifa.brd;
95616c82 1129 }
9b136840
JMM
1130 }
1131 else
1132 {
1133 net_fill_ip6(&ifa.prefix, ipa_to_ip6(ifa.ip), i->ifa_prefixlen);
1134 net_normalize(&ifa.prefix);
1135
d7661fbe 1136 if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH - 1)
9b136840 1137 ifa.opposite = ipa_opposite_m1(ifa.ip);
95616c82
OZ
1138 }
1139
1140 scope = ipa_classify(ifa.ip);
1141 if (scope < 0)
1142 {
1143 log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
1144 return;
1145 }
1146 ifa.scope = scope & IADDR_SCOPE_MASK;
1147
9b136840 1148 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
95616c82
OZ
1149 ifi->index, ifi->name,
1150 new ? "added" : "removed",
4659b2ae 1151 ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite);
3216eb03 1152
95616c82
OZ
1153 if (new)
1154 ifa_update(&ifa);
1155 else
1156 ifa_delete(&ifa);
3216eb03
OZ
1157
1158 if (!scan)
1159 if_end_partial_update(ifi);
95616c82
OZ
1160}
1161
9b136840
JMM
1162static void
1163nl_parse_addr(struct nlmsghdr *h, int scan)
1164{
1165 struct ifaddrmsg *i;
1166
1167 if (!(i = nl_checkin(h, sizeof(*i))))
1168 return;
1169
1170 int new = (h->nlmsg_type == RTM_NEWADDR);
1171
1172 switch (i->ifa_family)
1173 {
9b136840
JMM
1174 case AF_INET:
1175 return nl_parse_addr4(i, scan, new);
29a64162 1176
9b136840
JMM
1177 case AF_INET6:
1178 return nl_parse_addr6(i, scan, new);
9b136840
JMM
1179 }
1180}
1181
95616c82
OZ
1182void
1183kif_do_scan(struct kif_proto *p UNUSED)
1184{
1185 struct nlmsghdr *h;
1186
1187 if_start_update();
1188
e818f164 1189 nl_request_dump_link();
95616c82
OZ
1190 while (h = nl_get_scan())
1191 if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
1192 nl_parse_link(h, 1);
1193 else
1194 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1195
943478b0
OZ
1196 /* Re-resolve master interface for slaves */
1197 struct iface *i;
1198 WALK_LIST(i, iface_list)
1199 if (i->master_index)
1200 {
1201 struct iface f = {
1202 .flags = i->flags,
1203 .mtu = i->mtu,
1204 .index = i->index,
1205 .master_index = i->master_index,
1206 .master = if_find_by_index(i->master_index)
1207 };
1208
1209 if (f.master != i->master)
1210 {
1211 memcpy(f.name, i->name, sizeof(f.name));
1212 if_update(&f);
1213 }
1214 }
1215
e818f164 1216 nl_request_dump_addr(AF_INET);
95616c82
OZ
1217 while (h = nl_get_scan())
1218 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
3216eb03 1219 nl_parse_addr(h, 1);
95616c82
OZ
1220 else
1221 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1222
e818f164 1223 nl_request_dump_addr(AF_INET6);
d7661fbe
JMM
1224 while (h = nl_get_scan())
1225 if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
1226 nl_parse_addr(h, 1);
1227 else
1228 log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
29a64162 1229
95616c82
OZ
1230 if_end_update();
1231}
1232
1233/*
1234 * Routes
1235 */
1236
9ddbfbdd
JMM
1237static inline u32
1238krt_table_id(struct krt_proto *p)
1239{
1240 return KRT_CF->sys.table_id;
1241}
1242
1243static HASH(struct krt_proto) nl_table_map;
1244
29a64162
OZ
1245#define RTH_KEY(p) p->af, krt_table_id(p)
1246#define RTH_NEXT(p) p->sys.hash_next
1247#define RTH_EQ(a1,i1,a2,i2) a1 == a2 && i1 == i2
1248#define RTH_FN(a,i) a ^ u32_hash(i)
9ddbfbdd
JMM
1249
1250#define RTH_REHASH rth_rehash
1251#define RTH_PARAMS /8, *2, 2, 2, 6, 20
1252
1253HASH_DEFINE_REHASH_FN(RTH, struct krt_proto)
95616c82
OZ
1254
1255int
1256krt_capable(rte *e)
1257{
1258 rta *a = e->attrs;
1259
95616c82 1260 switch (a->dest)
62e64905 1261 {
4e276a89 1262 case RTD_UNICAST:
95616c82
OZ
1263 case RTD_BLACKHOLE:
1264 case RTD_UNREACHABLE:
1265 case RTD_PROHIBIT:
62e64905
OZ
1266 return 1;
1267
95616c82
OZ
1268 default:
1269 return 0;
62e64905 1270 }
95616c82
OZ
1271}
1272
1273static inline int
4e276a89 1274nh_bufsize(struct nexthop *nh)
95616c82
OZ
1275{
1276 int rv = 0;
1277 for (; nh != NULL; nh = nh->next)
9fdf9d29 1278 rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
95616c82
OZ
1279 return rv;
1280}
1281
1282static int
722daa95 1283nl_send_route(struct krt_proto *p, rte *e, int op)
95616c82
OZ
1284{
1285 eattr *ea;
1286 net *net = e->net;
1287 rta *a = e->attrs;
13c0be19 1288 ea_list *eattrs = a->eattrs;
4e276a89 1289 int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh));
4adcb9df 1290 u32 priority = 0;
a8caff32 1291
95616c82
OZ
1292 struct {
1293 struct nlmsghdr h;
1294 struct rtmsg r;
a8caff32
JMM
1295 char buf[0];
1296 } *r;
1297
1298 int rsize = sizeof(*r) + bufsize;
1299 r = alloca(rsize);
95616c82 1300
cc5b93f7 1301 DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op);
95616c82 1302
a8caff32
JMM
1303 bzero(&r->h, sizeof(r->h));
1304 bzero(&r->r, sizeof(r->r));
cc5b93f7 1305 r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE;
a8caff32 1306 r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
cc5b93f7 1307 r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK;
95616c82 1308
a8caff32
JMM
1309 r->r.rtm_family = p->af;
1310 r->r.rtm_dst_len = net_pxlen(net->n.addr);
1311 r->r.rtm_protocol = RTPROT_BIRD;
7074be22 1312 r->r.rtm_scope = RT_SCOPE_NOWHERE;
6b0f5f68 1313#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
1314 if (p->af == AF_MPLS)
1315 {
66acbc8d
OZ
1316 /*
1317 * Kernel MPLS code is a bit picky. We must:
1318 * 1) Always set RT_SCOPE_UNIVERSE and RTN_UNICAST (even for RTM_DELROUTE)
1319 * 2) Never use RTA_PRIORITY
1320 */
1321
d14f8c3c
JMM
1322 u32 label = net_mpls(net->n.addr);
1323 nl_add_attr_mpls(&r->h, rsize, RTA_DST, 1, &label);
66acbc8d
OZ
1324 r->r.rtm_scope = RT_SCOPE_UNIVERSE;
1325 r->r.rtm_type = RTN_UNICAST;
d14f8c3c
JMM
1326 }
1327 else
6b0f5f68 1328#endif
be17805c 1329 {
d14f8c3c 1330 nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr));
95616c82 1331
be17805c
OZ
1332 /* Add source address for IPv6 SADR routes */
1333 if (net->n.addr->type == NET_IP6_SADR)
1334 {
1335 net_addr_ip6_sadr *a = (void *) &net->n.addr;
1336 nl_add_attr_ip6(&r->h, rsize, RTA_SRC, a->src_prefix);
1337 r->r.rtm_src_len = a->src_pxlen;
1338 }
1339 }
1340
2feaa693
OZ
1341 /*
1342 * Strange behavior for RTM_DELROUTE:
1343 * 1) rtm_family is ignored in IPv6, works for IPv4
1344 * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
1345 * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
1346 */
1347
9ddbfbdd 1348 if (krt_table_id(p) < 256)
a8caff32 1349 r->r.rtm_table = krt_table_id(p);
9ddbfbdd 1350 else
a8caff32 1351 nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p));
9ddbfbdd 1352
66acbc8d
OZ
1353 if (p->af == AF_MPLS)
1354 priority = 0;
4adcb9df
OZ
1355 else if (KRT_CF->sys.metric)
1356 priority = KRT_CF->sys.metric;
1357 else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC)))
1358 priority = ea->u.data;
78a2cc28 1359
4adcb9df 1360 if (priority)
d1b8fe93 1361 nl_add_attr_u32(&r->h, rsize, RTA_PRIORITY, priority);
78a2cc28 1362
2feaa693
OZ
1363 /* For route delete, we do not specify remaining route attributes */
1364 if (op == NL_OP_DELETE)
722daa95 1365 goto done;
78a2cc28 1366
6e75d0d2 1367 /* Default scope is LINK for device routes, UNIVERSE otherwise */
66acbc8d
OZ
1368 if (p->af == AF_MPLS)
1369 r->r.rtm_scope = RT_SCOPE_UNIVERSE;
1370 else if (ea = ea_find(eattrs, EA_KRT_SCOPE))
cc5b93f7 1371 r->r.rtm_scope = ea->u.data;
722daa95
OZ
1372 else if (a->dest == RTD_UNICAST && ipa_zero(a->nh.gw))
1373 r->r.rtm_scope = RT_SCOPE_LINK;
6e75d0d2 1374 else
722daa95 1375 r->r.rtm_scope = RT_SCOPE_UNIVERSE;
95616c82
OZ
1376
1377 if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
a8caff32 1378 nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
95616c82
OZ
1379
1380 if (ea = ea_find(eattrs, EA_KRT_REALM))
a8caff32 1381 nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data);
95616c82 1382
9fdf9d29
OZ
1383
1384 u32 metrics[KRT_METRICS_MAX];
1385 metrics[0] = 0;
1386
1387 struct ea_walk_state ews = { .eattrs = eattrs };
1388 while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX))
1389 {
1390 int id = ea->id - EA_KRT_METRICS;
1391 metrics[0] |= 1 << id;
1392 metrics[id] = ea->u.data;
1393 }
1394
1395 if (metrics[0])
a8caff32 1396 nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX);
9fdf9d29 1397
722daa95 1398 switch (a->dest)
95616c82 1399 {
4e276a89 1400 case RTD_UNICAST:
a8caff32 1401 r->r.rtm_type = RTN_UNICAST;
722daa95
OZ
1402 struct nexthop *nh = &(a->nh);
1403 if (nh->next)
21f9acd2 1404 nl_add_multipath(&r->h, rsize, nh, p->af, eattrs);
4e276a89
JMM
1405 else
1406 {
1407 nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index);
d14f8c3c 1408 nl_add_nexthop(&r->h, rsize, nh, p->af);
a1f5e514
OZ
1409
1410 if (nh->flags & RNF_ONLINK)
1411 r->r.rtm_flags |= RTNH_F_ONLINK;
4e276a89 1412 }
95616c82
OZ
1413 break;
1414 case RTD_BLACKHOLE:
a8caff32 1415 r->r.rtm_type = RTN_BLACKHOLE;
95616c82
OZ
1416 break;
1417 case RTD_UNREACHABLE:
a8caff32 1418 r->r.rtm_type = RTN_UNREACHABLE;
95616c82
OZ
1419 break;
1420 case RTD_PROHIBIT:
a8caff32 1421 r->r.rtm_type = RTN_PROHIBIT;
95616c82 1422 break;
2feaa693
OZ
1423 case RTD_NONE:
1424 break;
95616c82
OZ
1425 default:
1426 bug("krt_capable inconsistent with nl_send_route");
1427 }
1428
722daa95 1429done:
2feaa693 1430 /* Ignore missing for DELETE */
cc5b93f7 1431 return nl_exchange(&r->h, (op == NL_OP_DELETE));
2feaa693
OZ
1432}
1433
1434static inline int
ddb1bdf2 1435nl_allow_replace(struct krt_proto *p, rte *new)
2feaa693 1436{
ddb1bdf2
OZ
1437 /*
1438 * We use NL_OP_REPLACE for IPv4, it has an issue with not checking for
1439 * matching rtm_protocol, but that is OK when dedicated priority is used.
1440 *
1441 * For IPv6, the NL_OP_REPLACE is still broken even in Linux 4.19 LTS
1442 * (although it seems to be fixed in Linux 5.10 LTS) for sequence:
1443 *
1444 * ip route add 2001:db8::/32 via fe80::1 dev eth0
1445 * ip route replace 2001:db8::/32 dev eth0
1446 *
1447 * (it ends with two routes instead of replacing the first by the second one)
1448 *
1449 * Replacing with direct and special type (e.g. unreachable) routes does not
1450 * work, but replacing with regular routes work reliably
1451 */
2feaa693 1452
ddb1bdf2
OZ
1453 if (krt_ipv4(p))
1454 return 1;
95616c82 1455
ddb1bdf2
OZ
1456 rta *a = new->attrs;
1457 return (a->dest == RTD_UNICAST) && ipa_nonzero(a->nh.gw);
8235c474
OZ
1458}
1459
95616c82 1460void
cc75b3e1 1461krt_replace_rte(struct krt_proto *p, net *n UNUSED, rte *new, rte *old)
95616c82
OZ
1462{
1463 int err = 0;
1464
ddb1bdf2 1465 if (old && new && nl_allow_replace(p, new))
8235c474 1466 {
722daa95 1467 err = nl_send_route(p, new, NL_OP_REPLACE);
8235c474
OZ
1468 }
1469 else
1470 {
1471 if (old)
722daa95 1472 nl_send_route(p, old, NL_OP_DELETE);
95616c82 1473
8235c474 1474 if (new)
722daa95 1475 err = nl_send_route(p, new, NL_OP_ADD);
8235c474 1476 }
95616c82 1477
cc75b3e1
OZ
1478 if (new)
1479 {
1480 if (err < 0)
1481 bmap_clear(&p->sync_map, new->id);
1482 else
1483 bmap_set(&p->sync_map, new->id);
1484 }
95616c82
OZ
1485}
1486
2feaa693 1487
d0dd1d20
OZ
1488#define SKIP0(ARG, ...) do { DBG("KRT: Ignoring route - " ARG, ##__VA_ARGS__); return; } while(0)
1489#define SKIP(ARG, ...) do { DBG("KRT: Ignoring route %N - " ARG, &dst, ##__VA_ARGS__); return; } while(0)
95616c82
OZ
1490
1491static void
2feaa693 1492nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
95616c82
OZ
1493{
1494 struct krt_proto *p;
1495 struct rtmsg *i;
ad276157 1496 struct rtattr *a[BIRD_RTA_MAX];
95616c82
OZ
1497 int new = h->nlmsg_type == RTM_NEWROUTE;
1498
be17805c 1499 net_addr dst, src = {};
95616c82 1500 u32 oif = ~0;
29a64162 1501 u32 table_id;
2feaa693 1502 u32 priority = 0;
6e75d0d2 1503 u32 def_scope = RT_SCOPE_UNIVERSE;
be17805c 1504 int krt_src;
95616c82 1505
ad276157 1506 if (!(i = nl_checkin(h, sizeof(*i))))
95616c82 1507 return;
ad276157
JMM
1508
1509 switch (i->rtm_family)
95616c82 1510 {
29a64162
OZ
1511 case AF_INET:
1512 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a)))
1513 return;
1514
1515 if (a[RTA_DST])
1516 net_fill_ip4(&dst, rta_get_ip4(a[RTA_DST]), i->rtm_dst_len);
1517 else
1518 net_fill_ip4(&dst, IP4_NONE, 0);
1519 break;
1520
cc5b93f7
OZ
1521 case AF_INET6:
1522 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a)))
1523 return;
29a64162
OZ
1524
1525 if (a[RTA_DST])
1526 net_fill_ip6(&dst, rta_get_ip6(a[RTA_DST]), i->rtm_dst_len);
1527 else
1528 net_fill_ip6(&dst, IP6_NONE, 0);
be17805c
OZ
1529
1530 if (a[RTA_SRC])
1531 net_fill_ip6(&src, rta_get_ip6(a[RTA_SRC]), i->rtm_src_len);
1532 else
1533 net_fill_ip6(&src, IP6_NONE, 0);
29a64162
OZ
1534 break;
1535
6b0f5f68 1536#ifdef HAVE_MPLS_KERNEL
d14f8c3c
JMM
1537 case AF_MPLS:
1538 if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want_mpls, a, sizeof(a)))
1539 return;
1540
ed610044 1541 if (!a[RTA_DST])
d0dd1d20 1542 SKIP0("MPLS route without RTA_DST\n");
ed610044
OZ
1543
1544 if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) != 1)
d0dd1d20 1545 SKIP0("MPLS route with multi-label RTA_DST\n");
ed610044
OZ
1546
1547 net_fill_mpls(&dst, rta_mpls_stack[0]);
d14f8c3c 1548 break;
6b0f5f68 1549#endif
d14f8c3c 1550
29a64162
OZ
1551 default:
1552 return;
95616c82
OZ
1553 }
1554
95616c82 1555 if (a[RTA_OIF])
acb04cfd 1556 oif = rta_get_u32(a[RTA_OIF]);
95616c82 1557
9ddbfbdd 1558 if (a[RTA_TABLE])
29a64162 1559 table_id = rta_get_u32(a[RTA_TABLE]);
9ddbfbdd 1560 else
29a64162 1561 table_id = i->rtm_table;
9ddbfbdd 1562
d0dd1d20
OZ
1563 if (i->rtm_flags & RTM_F_CLONED)
1564 SKIP("cloned\n");
1565
29a64162
OZ
1566 /* Do we know this table? */
1567 p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id);
95616c82 1568 if (!p)
4659b2ae 1569 SKIP("unknown table %u\n", table_id);
95616c82 1570
be17805c
OZ
1571 if (a[RTA_SRC] && (p->p.net_type != NET_IP6_SADR))
1572 SKIP("src prefix for non-SADR channel\n");
1573
95616c82
OZ
1574 if (a[RTA_IIF])
1575 SKIP("IIF set\n");
29a64162 1576
95616c82
OZ
1577 if (i->rtm_tos != 0) /* We don't support TOS */
1578 SKIP("TOS %02x\n", i->rtm_tos);
95616c82 1579
2feaa693 1580 if (s->scan && !new)
95616c82
OZ
1581 SKIP("RTM_DELROUTE in scan\n");
1582
2feaa693
OZ
1583 if (a[RTA_PRIORITY])
1584 priority = rta_get_u32(a[RTA_PRIORITY]);
1585
9b136840 1586 int c = net_classify(&dst);
95616c82
OZ
1587 if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
1588 SKIP("strange class/scope\n");
1589
95616c82
OZ
1590 switch (i->rtm_protocol)
1591 {
1592 case RTPROT_UNSPEC:
1593 SKIP("proto unspec\n");
1594
1595 case RTPROT_REDIRECT:
be17805c 1596 krt_src = KRT_SRC_REDIRECT;
95616c82
OZ
1597 break;
1598
1599 case RTPROT_KERNEL:
be17805c 1600 krt_src = KRT_SRC_KERNEL;
e83beb70 1601 break;
95616c82
OZ
1602
1603 case RTPROT_BIRD:
2feaa693 1604 if (!s->scan)
95616c82 1605 SKIP("echo\n");
be17805c 1606 krt_src = KRT_SRC_BIRD;
95616c82
OZ
1607 break;
1608
1609 case RTPROT_BOOT:
1610 default:
be17805c 1611 krt_src = KRT_SRC_ALIEN;
95616c82
OZ
1612 }
1613
be17805c
OZ
1614 net_addr *n = &dst;
1615 if (p->p.net_type == NET_IP6_SADR)
1616 {
1617 n = alloca(sizeof(net_addr_ip6_sadr));
1618 net_fill_ip6_sadr(n, net6_prefix(&dst), net6_pxlen(&dst),
1619 net6_prefix(&src), net6_pxlen(&src));
1620 }
1621
1622 net *net = net_get(p->p.main_channel->table, n);
95616c82 1623
d14f8c3c 1624 rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE);
2feaa693
OZ
1625 ra->source = RTS_INHERIT;
1626 ra->scope = SCOPE_UNIVERSE;
95616c82 1627
082905a8
OZ
1628 {
1629 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + 2 * sizeof(eattr));
1630 *ea = (ea_list) { .flags = EALF_SORTED, .count = 2 };
1631 ea->next = ra->eattrs;
1632 ra->eattrs = ea;
1633
1634 ea->attrs[0] = (eattr) {
1635 .id = EA_KRT_SOURCE,
1636 .type = EAF_TYPE_INT,
1637 .u.data = i->rtm_protocol
1638 };
1639
1640 ea->attrs[1] = (eattr) {
1641 .id = EA_KRT_METRIC,
1642 .type = EAF_TYPE_INT,
1643 .u.data = priority,
1644 };
1645 }
1646
21f9acd2
OZ
1647 if (a[RTA_FLOW])
1648 s->rta_flow = rta_get_u32(a[RTA_FLOW]);
1649 else
1650 s->rta_flow = 0;
1651
95616c82
OZ
1652 switch (i->rtm_type)
1653 {
1654 case RTN_UNICAST:
62e64905 1655 ra->dest = RTD_UNICAST;
95616c82 1656
98bb80a2 1657 if (a[RTA_MULTIPATH])
4ff15a75 1658 {
f5c8fb5f 1659 struct nexthop *nh = nl_parse_multipath(s, p, n, a[RTA_MULTIPATH], i->rtm_family, krt_src);
4e276a89 1660 if (!nh)
77d032c7 1661 SKIP("strange RTA_MULTIPATH\n");
9fdf9d29 1662
2eaf65ec 1663 nexthop_link(ra, nh);
95616c82
OZ
1664 break;
1665 }
1666
f5c8fb5f 1667 if ((i->rtm_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD))
77d032c7 1668 SKIP("ignore RTNH_F_DEAD\n");
df83f626 1669
4e276a89
JMM
1670 ra->nh.iface = if_find_by_index(oif);
1671 if (!ra->nh.iface)
95616c82 1672 {
fe9f1a6d 1673 log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif);
95616c82
OZ
1674 return;
1675 }
1676
53401bef
OZ
1677 if (a[RTA_GATEWAY])
1678 ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]);
1679
53401bef
OZ
1680 if (a[RTA_VIA])
1681 ra->nh.gw = rta_get_via(a[RTA_VIA]);
95616c82 1682
32425297
OZ
1683 if (i->rtm_flags & RTNH_F_ONLINK)
1684 ra->nh.flags |= RNF_ONLINK;
1685
53401bef
OZ
1686 if (ipa_nonzero(ra->nh.gw))
1687 {
95616c82 1688 /* Silently skip strange 6to4 routes */
0bf95f99 1689 const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96);
4e276a89 1690 if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit))
95616c82
OZ
1691 return;
1692
23c212e7 1693 neighbor *nbr;
586c1800
OZ
1694 nbr = neigh_find(&p->p, ra->nh.gw, ra->nh.iface,
1695 (ra->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0);
23c212e7 1696 if (!nbr || (nbr->scope == SCOPE_HOST))
95616c82 1697 {
4e276a89
JMM
1698 log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr,
1699 ra->nh.gw);
95616c82
OZ
1700 return;
1701 }
1702 }
95616c82
OZ
1703
1704 break;
1705 case RTN_BLACKHOLE:
2feaa693 1706 ra->dest = RTD_BLACKHOLE;
95616c82
OZ
1707 break;
1708 case RTN_UNREACHABLE:
2feaa693 1709 ra->dest = RTD_UNREACHABLE;
95616c82
OZ
1710 break;
1711 case RTN_PROHIBIT:
2feaa693 1712 ra->dest = RTD_PROHIBIT;
95616c82
OZ
1713 break;
1714 /* FIXME: What about RTN_THROW? */
1715 default:
1716 SKIP("type %d\n", i->rtm_type);
1717 return;
1718 }
1719
6b0f5f68 1720#ifdef HAVE_MPLS_KERNEL
d14f8c3c 1721 if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next)
2eaf65ec 1722 ra->nh.labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label);
d14f8c3c
JMM
1723
1724 if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next)
1725 {
1726 switch (rta_get_u16(a[RTA_ENCAP_TYPE]))
1727 {
1728 case LWTUNNEL_ENCAP_MPLS:
1729 {
1730 struct rtattr *enca[BIRD_RTA_MAX];
1731 nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
1732 nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
2eaf65ec 1733 ra->nh.labels = rta_get_mpls(enca[RTA_DST], ra->nh.label);
d14f8c3c
JMM
1734 break;
1735 }
1736 default:
1737 SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE]));
1738 break;
1739 }
1740 }
6b0f5f68 1741#endif
d14f8c3c 1742
6e75d0d2
OZ
1743 if (i->rtm_scope != def_scope)
1744 {
1745 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1746 ea->next = ra->eattrs;
1747 ra->eattrs = ea;
1748 ea->flags = EALF_SORTED;
1749 ea->count = 1;
16ac6c3c
MM
1750 ea->attrs[0] = (eattr) {
1751 .id = EA_KRT_SCOPE,
1752 .flags = 0,
1753 .type = EAF_TYPE_INT,
1754 .u.data = i->rtm_scope,
1755 };
6e75d0d2 1756 }
95616c82
OZ
1757
1758 if (a[RTA_PREFSRC])
1759 {
9b136840 1760 ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]);
95616c82 1761
16ac6c3c
MM
1762 struct adata *ad = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
1763 ad->length = sizeof(ps);
1764 memcpy(ad->data, &ps, sizeof(ps));
1765
2feaa693
OZ
1766 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1767 ea->next = ra->eattrs;
1768 ra->eattrs = ea;
95616c82
OZ
1769 ea->flags = EALF_SORTED;
1770 ea->count = 1;
16ac6c3c
MM
1771 ea->attrs[0] = (eattr) {
1772 .id = EA_KRT_PREFSRC,
1773 .flags = 0,
1774 .type = EAF_TYPE_IP_ADDRESS,
1775 .u.ptr = ad,
1776 };
95616c82
OZ
1777 }
1778
21f9acd2
OZ
1779 /* Can be set per-route or per-nexthop */
1780 if (s->rta_flow)
95616c82 1781 {
2feaa693
OZ
1782 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1783 ea->next = ra->eattrs;
1784 ra->eattrs = ea;
95616c82
OZ
1785 ea->flags = EALF_SORTED;
1786 ea->count = 1;
16ac6c3c
MM
1787 ea->attrs[0] = (eattr) {
1788 .id = EA_KRT_REALM,
1789 .flags = 0,
1790 .type = EAF_TYPE_INT,
1791 .u.data = s->rta_flow,
1792 };
95616c82
OZ
1793 }
1794
9fdf9d29
OZ
1795 if (a[RTA_METRICS])
1796 {
1797 u32 metrics[KRT_METRICS_MAX];
2feaa693 1798 ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
9fdf9d29
OZ
1799 int t, n = 0;
1800
1801 if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
1802 {
fe9f1a6d 1803 log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net->n.addr);
9fdf9d29
OZ
1804 return;
1805 }
1806
1807 for (t = 1; t < KRT_METRICS_MAX; t++)
1808 if (metrics[0] & (1 << t))
16ac6c3c
MM
1809 ea->attrs[n++] = (eattr) {
1810 .id = EA_CODE(PROTOCOL_KERNEL, KRT_METRICS_OFFSET + t),
1811 .flags = 0,
1812 .type = EAF_TYPE_INT, /* FIXME: Some are EAF_TYPE_BITFIELD */
1813 .u.data = metrics[t],
1814 };
9fdf9d29
OZ
1815
1816 if (n > 0)
1817 {
2feaa693 1818 ea->next = ra->eattrs;
9fdf9d29
OZ
1819 ea->flags = EALF_SORTED;
1820 ea->count = n;
2feaa693 1821 ra->eattrs = ea;
9fdf9d29
OZ
1822 }
1823 }
1824
082905a8 1825 rte *e = rte_get_temp(ra, p->p.main_source);
722daa95 1826 e->net = net;
2feaa693 1827
722daa95 1828 if (s->scan)
082905a8 1829 krt_got_route(p, e, krt_src);
95616c82 1830 else
082905a8 1831 krt_got_route_async(p, e, new, krt_src);
2feaa693 1832
722daa95 1833 lp_flush(s->pool);
95616c82
OZ
1834}
1835
1836void
534d0a4b 1837krt_do_scan(struct krt_proto *p)
95616c82 1838{
722daa95
OZ
1839 struct nl_parse_state s = {
1840 .proto = p,
1841 .pool = nl_linpool,
1842 .scan = 1,
1843 };
534d0a4b
OZ
1844
1845 /* Table-specific scan or shared scan */
1846 if (p)
1847 nl_request_dump_route(p->af, krt_table_id(p));
1848 else
1849 nl_request_dump_route(AF_UNSPEC, 0);
95616c82 1850
722daa95 1851 struct nlmsghdr *h;
95616c82 1852 while (h = nl_get_scan())
534d0a4b 1853 {
95616c82 1854 if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
2feaa693 1855 nl_parse_route(&s, h);
95616c82
OZ
1856 else
1857 log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
534d0a4b 1858 }
95616c82
OZ
1859}
1860
1861/*
1862 * Asynchronous Netlink interface
1863 */
1864
1865static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
1866static byte *nl_async_rx_buffer; /* Receive buffer */
81ee6cda
OZ
1867static uint nl_async_bufsize; /* Kernel rx buffer size for the netlink socket */
1868static struct config *nl_last_config; /* For tracking changes to nl_async_bufsize */
95616c82
OZ
1869
1870static void
1871nl_async_msg(struct nlmsghdr *h)
1872{
722daa95
OZ
1873 struct nl_parse_state s = {
1874 .proto = NULL,
1875 .pool = nl_linpool,
1876 .scan = 0,
1877 };
2feaa693 1878
95616c82
OZ
1879 switch (h->nlmsg_type)
1880 {
1881 case RTM_NEWROUTE:
1882 case RTM_DELROUTE:
1883 DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
2feaa693 1884 nl_parse_route(&s, h);
95616c82
OZ
1885 break;
1886 case RTM_NEWLINK:
1887 case RTM_DELLINK:
1888 DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
1e4891e4
OZ
1889 if (kif_proto)
1890 nl_parse_link(h, 0);
95616c82
OZ
1891 break;
1892 case RTM_NEWADDR:
1893 case RTM_DELADDR:
1894 DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
1e4891e4
OZ
1895 if (kif_proto)
1896 nl_parse_addr(h, 0);
95616c82
OZ
1897 break;
1898 default:
1899 DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
1900 }
1901}
1902
1903static int
3e236955 1904nl_async_hook(sock *sk, uint size UNUSED)
95616c82
OZ
1905{
1906 struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
1907 struct sockaddr_nl sa;
31e9e101
ST
1908 struct msghdr m = {
1909 .msg_name = &sa,
1910 .msg_namelen = sizeof(sa),
1911 .msg_iov = &iov,
1912 .msg_iovlen = 1,
1913 };
95616c82
OZ
1914 struct nlmsghdr *h;
1915 int x;
ae80a2de 1916 uint len;
95616c82
OZ
1917
1918 x = recvmsg(sk->fd, &m, 0);
1919 if (x < 0)
1920 {
1921 if (errno == ENOBUFS)
1922 {
1923 /*
1924 * Netlink reports some packets have been thrown away.
1925 * One day we might react to it by asking for route table
1926 * scan in near future.
1927 */
2c33da50 1928 log(L_WARN "Kernel dropped some netlink messages, will resync on next scan.");
95616c82
OZ
1929 return 1; /* More data are likely to be ready */
1930 }
1931 else if (errno != EWOULDBLOCK)
1932 log(L_ERR "Netlink recvmsg: %m");
1933 return 0;
1934 }
1935 if (sa.nl_pid) /* It isn't from the kernel */
1936 {
1937 DBG("Non-kernel packet\n");
1938 return 1;
1939 }
1940 h = (void *) nl_async_rx_buffer;
1941 len = x;
1942 if (m.msg_flags & MSG_TRUNC)
1943 {
1944 log(L_WARN "Netlink got truncated asynchronous message");
1945 return 1;
1946 }
1947 while (NLMSG_OK(h, len))
1948 {
1949 nl_async_msg(h);
1950 h = NLMSG_NEXT(h, len);
1951 }
1952 if (len)
1953 log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
1954 return 1;
1955}
1956
ccd2a3ed
JMM
1957static void
1958nl_async_err_hook(sock *sk, int e UNUSED)
1959{
1960 nl_async_hook(sk, 0);
1961}
1962
95616c82
OZ
1963static void
1964nl_open_async(void)
1965{
1966 sock *sk;
1967 struct sockaddr_nl sa;
1968 int fd;
95616c82 1969
f83ce94d 1970 if (nl_async_sk)
95616c82 1971 return;
95616c82
OZ
1972
1973 DBG("KRT: Opening async netlink socket\n");
1974
1975 fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1976 if (fd < 0)
1977 {
1978 log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
1979 return;
1980 }
1981
1982 bzero(&sa, sizeof(sa));
1983 sa.nl_family = AF_NETLINK;
29a64162
OZ
1984 sa.nl_groups = RTMGRP_LINK |
1985 RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE |
1986 RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
1987
95616c82
OZ
1988 if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
1989 {
1990 log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
f83ce94d 1991 close(fd);
95616c82
OZ
1992 return;
1993 }
1994
f83ce94d
OZ
1995 nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
1996
95616c82
OZ
1997 sk = nl_async_sk = sk_new(krt_pool);
1998 sk->type = SK_MAGIC;
1999 sk->rx_hook = nl_async_hook;
ccd2a3ed 2000 sk->err_hook = nl_async_err_hook;
95616c82 2001 sk->fd = fd;
05476c4d 2002 if (sk_open(sk) < 0)
95616c82 2003 bug("Netlink: sk_open failed");
95616c82
OZ
2004}
2005
81ee6cda
OZ
2006static void
2007nl_update_async_bufsize(void)
2008{
2009 /* No async socket */
2010 if (!nl_async_sk)
2011 return;
2012
2013 /* Already reconfigured */
2014 if (nl_last_config == config)
2015 return;
2016
2017 /* Update netlink buffer size */
2018 uint bufsize = nl_cfg_rx_buffer_size(config);
2019 if (bufsize && (bufsize != nl_async_bufsize))
2020 {
2021 /* Log message for reconfigurations only */
2022 if (nl_last_config)
2023 log(L_INFO "KRT: Changing netlink rx buffer size to %u", bufsize);
2024
2025 nl_set_rcvbuf(nl_async_sk->fd, bufsize);
2026 nl_async_bufsize = bufsize;
2027 }
2028
2029 nl_last_config = config;
2030}
2031
9ddbfbdd 2032
95616c82
OZ
2033/*
2034 * Interface to the UNIX krt module
2035 */
2036
95616c82 2037void
9ddbfbdd
JMM
2038krt_sys_io_init(void)
2039{
05d47bd5 2040 nl_linpool = lp_new_default(krt_pool);
9ddbfbdd
JMM
2041 HASH_INIT(nl_table_map, krt_pool, 6);
2042}
2043
2044int
c6964c30 2045krt_sys_start(struct krt_proto *p)
95616c82 2046{
29a64162 2047 struct krt_proto *old = HASH_FIND(nl_table_map, RTH, p->af, krt_table_id(p));
9ddbfbdd
JMM
2048
2049 if (old)
2050 {
2051 log(L_ERR "%s: Kernel table %u already registered by %s",
2052 p->p.name, krt_table_id(p), old->p.name);
2053 return 0;
2054 }
2055
2056 HASH_INSERT2(nl_table_map, RTH, krt_pool, p);
c6964c30
OZ
2057
2058 nl_open();
2059 nl_open_async();
81ee6cda 2060 nl_update_async_bufsize();
9ddbfbdd
JMM
2061
2062 return 1;
95616c82
OZ
2063}
2064
2065void
9ddbfbdd 2066krt_sys_shutdown(struct krt_proto *p)
95616c82 2067{
81ee6cda
OZ
2068 nl_update_async_bufsize();
2069
9ddbfbdd 2070 HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
95616c82
OZ
2071}
2072
2073int
2074krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
2075{
81ee6cda
OZ
2076 nl_update_async_bufsize();
2077
4adcb9df 2078 return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
95616c82
OZ
2079}
2080
95616c82
OZ
2081void
2082krt_sys_init_config(struct krt_config *cf)
2083{
2084 cf->sys.table_id = RT_TABLE_MAIN;
bff21441 2085 cf->sys.metric = 32;
95616c82
OZ
2086}
2087
2088void
2089krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
2090{
2091 d->sys.table_id = s->sys.table_id;
4adcb9df 2092 d->sys.metric = s->sys.metric;
95616c82
OZ
2093}
2094
9fdf9d29
OZ
2095static const char *krt_metrics_names[KRT_METRICS_MAX] = {
2096 NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
2097 "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
2098};
2099
2100static const char *krt_features_names[KRT_FEATURES_MAX] = {
2101 "ecn", NULL, NULL, "allfrag"
2102};
2103
2104int
258be565 2105krt_sys_get_attr(const eattr *a, byte *buf, int buflen UNUSED)
9fdf9d29
OZ
2106{
2107 switch (a->id)
2108 {
2109 case EA_KRT_PREFSRC:
2110 bsprintf(buf, "prefsrc");
2111 return GA_NAME;
2112
2113 case EA_KRT_REALM:
2114 bsprintf(buf, "realm");
2115 return GA_NAME;
2116
6e75d0d2
OZ
2117 case EA_KRT_SCOPE:
2118 bsprintf(buf, "scope");
2119 return GA_NAME;
2120
9fdf9d29
OZ
2121 case EA_KRT_LOCK:
2122 buf += bsprintf(buf, "lock:");
2123 ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
2124 return GA_FULL;
2125
2126 case EA_KRT_FEATURES:
2127 buf += bsprintf(buf, "features:");
2128 ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
2129 return GA_FULL;
2130
2131 default:;
2132 int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET;
2133 if (id > 0 && id < KRT_METRICS_MAX)
2134 {
2135 bsprintf(buf, "%s", krt_metrics_names[id]);
2136 return GA_NAME;
2137 }
2138
2139 return GA_UNKNOWN;
2140 }
2141}
2142
95616c82
OZ
2143
2144
2145void
2146kif_sys_start(struct kif_proto *p UNUSED)
2147{
2148 nl_open();
2149 nl_open_async();
2150}
2151
2152void
2153kif_sys_shutdown(struct kif_proto *p UNUSED)
2154{
2155}
153f02da
OZ
2156
2157int
2158kif_update_sysdep_addr(struct iface *i UNUSED)
2159{
2160 return 0;
2161}