]> git.ipfire.org Git - people/arne_f/kernel.git/blame - net/ipv6/route.c
ipv6: recursive check rt->dst.from when call rt6_check_expired
[people/arne_f/kernel.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
1da177e4
LT
60
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
1716a961 67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 68 const struct in6_addr *dest);
1da177e4 69static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 70static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 71static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73static void ip6_dst_destroy(struct dst_entry *);
74static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
569d3645 76static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
77
78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
81static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
82 struct sk_buff *skb, u32 mtu);
83static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
84 struct sk_buff *skb);
1da177e4 85
70ceb4f5 86#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 87static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
88 const struct in6_addr *prefix, int prefixlen,
89 const struct in6_addr *gwaddr, int ifindex,
95c96174 90 unsigned int pref);
efa2cea0 91static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
92 const struct in6_addr *prefix, int prefixlen,
93 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
94#endif
95
06582540
DM
96static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
97{
98 struct rt6_info *rt = (struct rt6_info *) dst;
99 struct inet_peer *peer;
100 u32 *p = NULL;
101
8e2ec639
YZ
102 if (!(rt->dst.flags & DST_HOST))
103 return NULL;
104
fbfe95a4 105 peer = rt6_get_peer_create(rt);
06582540
DM
106 if (peer) {
107 u32 *old_p = __DST_METRICS_PTR(old);
108 unsigned long prev, new;
109
110 p = peer->metrics;
111 if (inet_metrics_new(peer))
112 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113
114 new = (unsigned long) p;
115 prev = cmpxchg(&dst->_metrics, old, new);
116
117 if (prev != old) {
118 p = __DST_METRICS_PTR(prev);
119 if (prev & DST_METRICS_READ_ONLY)
120 p = NULL;
121 }
122 }
123 return p;
124}
125
f894cbf8
DM
126static inline const void *choose_neigh_daddr(struct rt6_info *rt,
127 struct sk_buff *skb,
128 const void *daddr)
39232973
DM
129{
130 struct in6_addr *p = &rt->rt6i_gateway;
131
a7563f34 132 if (!ipv6_addr_any(p))
39232973 133 return (const void *) p;
f894cbf8
DM
134 else if (skb)
135 return &ipv6_hdr(skb)->daddr;
39232973
DM
136 return daddr;
137}
138
f894cbf8
DM
139static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
140 struct sk_buff *skb,
141 const void *daddr)
d3aaeb38 142{
39232973
DM
143 struct rt6_info *rt = (struct rt6_info *) dst;
144 struct neighbour *n;
145
f894cbf8 146 daddr = choose_neigh_daddr(rt, skb, daddr);
39232973 147 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
148 if (n)
149 return n;
150 return neigh_create(&nd_tbl, daddr, dst->dev);
151}
152
8ade06c6 153static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 154{
8ade06c6
DM
155 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
156 if (!n) {
157 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
158 if (IS_ERR(n))
159 return PTR_ERR(n);
160 }
97cac082 161 rt->n = n;
f83c7790
DM
162
163 return 0;
d3aaeb38
DM
164}
165
9a7ec3a9 166static struct dst_ops ip6_dst_ops_template = {
1da177e4 167 .family = AF_INET6,
09640e63 168 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
169 .gc = ip6_dst_gc,
170 .gc_thresh = 1024,
171 .check = ip6_dst_check,
0dbaee3b 172 .default_advmss = ip6_default_advmss,
ebb762f2 173 .mtu = ip6_mtu,
06582540 174 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
175 .destroy = ip6_dst_destroy,
176 .ifdown = ip6_dst_ifdown,
177 .negative_advice = ip6_negative_advice,
178 .link_failure = ip6_link_failure,
179 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 180 .redirect = rt6_do_redirect,
1ac06e03 181 .local_out = __ip6_local_out,
d3aaeb38 182 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
183};
184
ebb762f2 185static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 186{
618f9bc7
SK
187 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
188
189 return mtu ? : dst->dev->mtu;
ec831ea7
RD
190}
191
6700c270
DM
192static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
193 struct sk_buff *skb, u32 mtu)
14e50e57
DM
194{
195}
196
6700c270
DM
197static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
198 struct sk_buff *skb)
b587ee3b
DM
199{
200}
201
0972ddb2
HB
202static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
203 unsigned long old)
204{
205 return NULL;
206}
207
14e50e57
DM
208static struct dst_ops ip6_dst_blackhole_ops = {
209 .family = AF_INET6,
09640e63 210 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
211 .destroy = ip6_dst_destroy,
212 .check = ip6_dst_check,
ebb762f2 213 .mtu = ip6_blackhole_mtu,
214f45c9 214 .default_advmss = ip6_default_advmss,
14e50e57 215 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 216 .redirect = ip6_rt_blackhole_redirect,
0972ddb2 217 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 218 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
219};
220
62fa8a84
DM
221static const u32 ip6_template_metrics[RTAX_MAX] = {
222 [RTAX_HOPLIMIT - 1] = 255,
223};
224
fb0af4c7 225static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
226 .dst = {
227 .__refcnt = ATOMIC_INIT(1),
228 .__use = 1,
229 .obsolete = -1,
230 .error = -ENETUNREACH,
d8d1f30b
CG
231 .input = ip6_pkt_discard,
232 .output = ip6_pkt_discard_out,
1da177e4
LT
233 },
234 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 235 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
236 .rt6i_metric = ~(u32) 0,
237 .rt6i_ref = ATOMIC_INIT(1),
238};
239
101367c2
TG
240#ifdef CONFIG_IPV6_MULTIPLE_TABLES
241
6723ab54
DM
242static int ip6_pkt_prohibit(struct sk_buff *skb);
243static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 244
fb0af4c7 245static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
246 .dst = {
247 .__refcnt = ATOMIC_INIT(1),
248 .__use = 1,
249 .obsolete = -1,
250 .error = -EACCES,
d8d1f30b
CG
251 .input = ip6_pkt_prohibit,
252 .output = ip6_pkt_prohibit_out,
101367c2
TG
253 },
254 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 255 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
256 .rt6i_metric = ~(u32) 0,
257 .rt6i_ref = ATOMIC_INIT(1),
258};
259
fb0af4c7 260static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
261 .dst = {
262 .__refcnt = ATOMIC_INIT(1),
263 .__use = 1,
264 .obsolete = -1,
265 .error = -EINVAL,
d8d1f30b
CG
266 .input = dst_discard,
267 .output = dst_discard,
101367c2
TG
268 },
269 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 270 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
271 .rt6i_metric = ~(u32) 0,
272 .rt6i_ref = ATOMIC_INIT(1),
273};
274
275#endif
276
1da177e4 277/* allocate dst with ip6_dst_ops */
97bab73f 278static inline struct rt6_info *ip6_dst_alloc(struct net *net,
957c665f 279 struct net_device *dev,
8b96d22d
DM
280 int flags,
281 struct fib6_table *table)
1da177e4 282{
97bab73f 283 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
f5b0a874 284 0, DST_OBSOLETE_NONE, flags);
cf911662 285
97bab73f 286 if (rt) {
8104891b
SK
287 struct dst_entry *dst = &rt->dst;
288
289 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
8b96d22d 290 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
97bab73f 291 }
cf911662 292 return rt;
1da177e4
LT
293}
294
295static void ip6_dst_destroy(struct dst_entry *dst)
296{
297 struct rt6_info *rt = (struct rt6_info *)dst;
298 struct inet6_dev *idev = rt->rt6i_idev;
299
97cac082
DM
300 if (rt->n)
301 neigh_release(rt->n);
302
8e2ec639
YZ
303 if (!(rt->dst.flags & DST_HOST))
304 dst_destroy_metrics_generic(dst);
305
38308473 306 if (idev) {
1da177e4
LT
307 rt->rt6i_idev = NULL;
308 in6_dev_put(idev);
1ab1457c 309 }
1716a961
G
310
311 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
312 dst_release(dst->from);
313
97bab73f
DM
314 if (rt6_has_peer(rt)) {
315 struct inet_peer *peer = rt6_peer_ptr(rt);
b3419363
DM
316 inet_putpeer(peer);
317 }
318}
319
6431cbc2
DM
320static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
321
322static u32 rt6_peer_genid(void)
323{
324 return atomic_read(&__rt6_peer_genid);
325}
326
b3419363
DM
327void rt6_bind_peer(struct rt6_info *rt, int create)
328{
97bab73f 329 struct inet_peer_base *base;
b3419363
DM
330 struct inet_peer *peer;
331
97bab73f
DM
332 base = inetpeer_base_ptr(rt->_rt6i_peer);
333 if (!base)
334 return;
335
336 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
7b34ca2a
DM
337 if (peer) {
338 if (!rt6_set_peer(rt, peer))
339 inet_putpeer(peer);
340 else
341 rt->rt6i_peer_genid = rt6_peer_genid();
342 }
1da177e4
LT
343}
344
345static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
346 int how)
347{
348 struct rt6_info *rt = (struct rt6_info *)dst;
349 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 350 struct net_device *loopback_dev =
c346dca1 351 dev_net(dev)->loopback_dev;
1da177e4 352
97cac082
DM
353 if (dev != loopback_dev) {
354 if (idev && idev->dev == dev) {
355 struct inet6_dev *loopback_idev =
356 in6_dev_get(loopback_dev);
357 if (loopback_idev) {
358 rt->rt6i_idev = loopback_idev;
359 in6_dev_put(idev);
360 }
361 }
362 if (rt->n && rt->n->dev == dev) {
363 rt->n->dev = loopback_dev;
364 dev_hold(loopback_dev);
365 dev_put(dev);
1da177e4
LT
366 }
367 }
368}
369
a50feda5 370static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 371{
1716a961
G
372 if (rt->rt6i_flags & RTF_EXPIRES) {
373 if (time_after(jiffies, rt->dst.expires))
a50feda5 374 return true;
1716a961 375 } else if (rt->dst.from) {
3fd91fb3 376 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 377 }
a50feda5 378 return false;
1da177e4
LT
379}
380
a50feda5 381static bool rt6_need_strict(const struct in6_addr *daddr)
c71099ac 382{
a02cec21
ED
383 return ipv6_addr_type(daddr) &
384 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
385}
386
1da177e4 387/*
c71099ac 388 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
389 */
390
8ed67789
DL
391static inline struct rt6_info *rt6_device_match(struct net *net,
392 struct rt6_info *rt,
b71d1d42 393 const struct in6_addr *saddr,
1da177e4 394 int oif,
d420895e 395 int flags)
1da177e4
LT
396{
397 struct rt6_info *local = NULL;
398 struct rt6_info *sprt;
399
dd3abc4e
YH
400 if (!oif && ipv6_addr_any(saddr))
401 goto out;
402
d8d1f30b 403 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 404 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
405
406 if (oif) {
1da177e4
LT
407 if (dev->ifindex == oif)
408 return sprt;
409 if (dev->flags & IFF_LOOPBACK) {
38308473 410 if (!sprt->rt6i_idev ||
1da177e4 411 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 412 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 413 continue;
1ab1457c 414 if (local && (!oif ||
1da177e4
LT
415 local->rt6i_idev->dev->ifindex == oif))
416 continue;
417 }
418 local = sprt;
419 }
dd3abc4e
YH
420 } else {
421 if (ipv6_chk_addr(net, saddr, dev,
422 flags & RT6_LOOKUP_F_IFACE))
423 return sprt;
1da177e4 424 }
dd3abc4e 425 }
1da177e4 426
dd3abc4e 427 if (oif) {
1da177e4
LT
428 if (local)
429 return local;
430
d420895e 431 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 432 return net->ipv6.ip6_null_entry;
1da177e4 433 }
dd3abc4e 434out:
1da177e4
LT
435 return rt;
436}
437
27097255
YH
438#ifdef CONFIG_IPV6_ROUTER_PREF
439static void rt6_probe(struct rt6_info *rt)
440{
f2c31e32 441 struct neighbour *neigh;
27097255
YH
442 /*
443 * Okay, this does not seem to be appropriate
444 * for now, however, we need to check if it
445 * is really so; aka Router Reachability Probing.
446 *
447 * Router Reachability Probe MUST be rate-limited
448 * to no more than one per minute.
449 */
97cac082 450 neigh = rt ? rt->n : NULL;
27097255 451 if (!neigh || (neigh->nud_state & NUD_VALID))
fdd6681d 452 return;
27097255
YH
453 read_lock_bh(&neigh->lock);
454 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 455 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
456 struct in6_addr mcaddr;
457 struct in6_addr *target;
458
459 neigh->updated = jiffies;
460 read_unlock_bh(&neigh->lock);
461
462 target = (struct in6_addr *)&neigh->primary_key;
463 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 464 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 465 } else {
27097255 466 read_unlock_bh(&neigh->lock);
f2c31e32 467 }
27097255
YH
468}
469#else
470static inline void rt6_probe(struct rt6_info *rt)
471{
27097255
YH
472}
473#endif
474
1da177e4 475/*
554cfb7e 476 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 477 */
b6f99a21 478static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 479{
d1918542 480 struct net_device *dev = rt->dst.dev;
161980f4 481 if (!oif || dev->ifindex == oif)
554cfb7e 482 return 2;
161980f4
DM
483 if ((dev->flags & IFF_LOOPBACK) &&
484 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
485 return 1;
486 return 0;
554cfb7e 487}
1da177e4 488
b6f99a21 489static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 490{
f2c31e32 491 struct neighbour *neigh;
398bcbeb 492 int m;
f2c31e32 493
97cac082 494 neigh = rt->n;
4d0c5911
YH
495 if (rt->rt6i_flags & RTF_NONEXTHOP ||
496 !(rt->rt6i_flags & RTF_GATEWAY))
497 m = 1;
498 else if (neigh) {
554cfb7e
YH
499 read_lock_bh(&neigh->lock);
500 if (neigh->nud_state & NUD_VALID)
4d0c5911 501 m = 2;
398bcbeb
YH
502#ifdef CONFIG_IPV6_ROUTER_PREF
503 else if (neigh->nud_state & NUD_FAILED)
504 m = 0;
505#endif
506 else
ea73ee23 507 m = 1;
554cfb7e 508 read_unlock_bh(&neigh->lock);
398bcbeb
YH
509 } else
510 m = 0;
554cfb7e 511 return m;
1da177e4
LT
512}
513
554cfb7e
YH
514static int rt6_score_route(struct rt6_info *rt, int oif,
515 int strict)
1da177e4 516{
4d0c5911 517 int m, n;
1ab1457c 518
4d0c5911 519 m = rt6_check_dev(rt, oif);
77d16f45 520 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 521 return -1;
ebacaaa0
YH
522#ifdef CONFIG_IPV6_ROUTER_PREF
523 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
524#endif
4d0c5911 525 n = rt6_check_neigh(rt);
557e92ef 526 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
527 return -1;
528 return m;
529}
530
f11e6659
DM
531static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
532 int *mpri, struct rt6_info *match)
554cfb7e 533{
f11e6659
DM
534 int m;
535
536 if (rt6_check_expired(rt))
537 goto out;
538
539 m = rt6_score_route(rt, oif, strict);
540 if (m < 0)
541 goto out;
542
543 if (m > *mpri) {
544 if (strict & RT6_LOOKUP_F_REACHABLE)
545 rt6_probe(match);
546 *mpri = m;
547 match = rt;
548 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
549 rt6_probe(rt);
550 }
551
552out:
553 return match;
554}
555
556static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
557 struct rt6_info *rr_head,
558 u32 metric, int oif, int strict)
559{
560 struct rt6_info *rt, *match;
554cfb7e 561 int mpri = -1;
1da177e4 562
f11e6659
DM
563 match = NULL;
564 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 565 rt = rt->dst.rt6_next)
f11e6659
DM
566 match = find_match(rt, oif, strict, &mpri, match);
567 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 568 rt = rt->dst.rt6_next)
f11e6659 569 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 570
f11e6659
DM
571 return match;
572}
1da177e4 573
f11e6659
DM
574static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
575{
576 struct rt6_info *match, *rt0;
8ed67789 577 struct net *net;
1da177e4 578
f11e6659
DM
579 rt0 = fn->rr_ptr;
580 if (!rt0)
581 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 582
f11e6659 583 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 584
554cfb7e 585 if (!match &&
f11e6659 586 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 587 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 588
554cfb7e 589 /* no entries matched; do round-robin */
f11e6659
DM
590 if (!next || next->rt6i_metric != rt0->rt6i_metric)
591 next = fn->leaf;
592
593 if (next != rt0)
594 fn->rr_ptr = next;
1da177e4 595 }
1da177e4 596
d1918542 597 net = dev_net(rt0->dst.dev);
a02cec21 598 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
599}
600
70ceb4f5
YH
601#ifdef CONFIG_IPV6_ROUTE_INFO
602int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 603 const struct in6_addr *gwaddr)
70ceb4f5 604{
c346dca1 605 struct net *net = dev_net(dev);
70ceb4f5
YH
606 struct route_info *rinfo = (struct route_info *) opt;
607 struct in6_addr prefix_buf, *prefix;
608 unsigned int pref;
4bed72e4 609 unsigned long lifetime;
70ceb4f5
YH
610 struct rt6_info *rt;
611
612 if (len < sizeof(struct route_info)) {
613 return -EINVAL;
614 }
615
616 /* Sanity check for prefix_len and length */
617 if (rinfo->length > 3) {
618 return -EINVAL;
619 } else if (rinfo->prefix_len > 128) {
620 return -EINVAL;
621 } else if (rinfo->prefix_len > 64) {
622 if (rinfo->length < 2) {
623 return -EINVAL;
624 }
625 } else if (rinfo->prefix_len > 0) {
626 if (rinfo->length < 1) {
627 return -EINVAL;
628 }
629 }
630
631 pref = rinfo->route_pref;
632 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 633 return -EINVAL;
70ceb4f5 634
4bed72e4 635 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
636
637 if (rinfo->length == 3)
638 prefix = (struct in6_addr *)rinfo->prefix;
639 else {
640 /* this function is safe */
641 ipv6_addr_prefix(&prefix_buf,
642 (struct in6_addr *)rinfo->prefix,
643 rinfo->prefix_len);
644 prefix = &prefix_buf;
645 }
646
efa2cea0
DL
647 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
648 dev->ifindex);
70ceb4f5
YH
649
650 if (rt && !lifetime) {
e0a1ad73 651 ip6_del_rt(rt);
70ceb4f5
YH
652 rt = NULL;
653 }
654
655 if (!rt && lifetime)
efa2cea0 656 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
657 pref);
658 else if (rt)
659 rt->rt6i_flags = RTF_ROUTEINFO |
660 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
661
662 if (rt) {
1716a961
G
663 if (!addrconf_finite_timeout(lifetime))
664 rt6_clean_expires(rt);
665 else
666 rt6_set_expires(rt, jiffies + HZ * lifetime);
667
d8d1f30b 668 dst_release(&rt->dst);
70ceb4f5
YH
669 }
670 return 0;
671}
672#endif
673
8ed67789 674#define BACKTRACK(__net, saddr) \
982f56f3 675do { \
8ed67789 676 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 677 struct fib6_node *pn; \
e0eda7bb 678 while (1) { \
982f56f3
YH
679 if (fn->fn_flags & RTN_TL_ROOT) \
680 goto out; \
681 pn = fn->parent; \
682 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 683 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
684 else \
685 fn = pn; \
686 if (fn->fn_flags & RTN_RTINFO) \
687 goto restart; \
c71099ac 688 } \
c71099ac 689 } \
38308473 690} while (0)
c71099ac 691
8ed67789
DL
692static struct rt6_info *ip6_pol_route_lookup(struct net *net,
693 struct fib6_table *table,
4c9483b2 694 struct flowi6 *fl6, int flags)
1da177e4
LT
695{
696 struct fib6_node *fn;
697 struct rt6_info *rt;
698
c71099ac 699 read_lock_bh(&table->tb6_lock);
4c9483b2 700 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
701restart:
702 rt = fn->leaf;
4c9483b2
DM
703 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
704 BACKTRACK(net, &fl6->saddr);
c71099ac 705out:
d8d1f30b 706 dst_use(&rt->dst, jiffies);
c71099ac 707 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
708 return rt;
709
710}
711
ea6e574e
FW
712struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
713 int flags)
714{
715 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
716}
717EXPORT_SYMBOL_GPL(ip6_route_lookup);
718
9acd9f3a
YH
719struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
720 const struct in6_addr *saddr, int oif, int strict)
c71099ac 721{
4c9483b2
DM
722 struct flowi6 fl6 = {
723 .flowi6_oif = oif,
724 .daddr = *daddr,
c71099ac
TG
725 };
726 struct dst_entry *dst;
77d16f45 727 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 728
adaa70bb 729 if (saddr) {
4c9483b2 730 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
731 flags |= RT6_LOOKUP_F_HAS_SADDR;
732 }
733
4c9483b2 734 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
735 if (dst->error == 0)
736 return (struct rt6_info *) dst;
737
738 dst_release(dst);
739
1da177e4
LT
740 return NULL;
741}
742
7159039a
YH
743EXPORT_SYMBOL(rt6_lookup);
744
c71099ac 745/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
746 It takes new route entry, the addition fails by any reason the
747 route is freed. In any case, if caller does not hold it, it may
748 be destroyed.
749 */
750
86872cb5 751static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
752{
753 int err;
c71099ac 754 struct fib6_table *table;
1da177e4 755
c71099ac
TG
756 table = rt->rt6i_table;
757 write_lock_bh(&table->tb6_lock);
86872cb5 758 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 759 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
760
761 return err;
762}
763
40e22e8f
TG
764int ip6_ins_rt(struct rt6_info *rt)
765{
4d1169c1 766 struct nl_info info = {
d1918542 767 .nl_net = dev_net(rt->dst.dev),
4d1169c1 768 };
528c4ceb 769 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
770}
771
1716a961 772static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 773 const struct in6_addr *daddr,
b71d1d42 774 const struct in6_addr *saddr)
1da177e4 775{
1da177e4
LT
776 struct rt6_info *rt;
777
778 /*
779 * Clone the route.
780 */
781
21efcfa0 782 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
783
784 if (rt) {
14deae41
DM
785 int attempts = !in_softirq();
786
38308473 787 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 788 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 789 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 790 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 791 rt->rt6i_gateway = *daddr;
58c4fb86 792 }
1da177e4 793
1da177e4 794 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
795
796#ifdef CONFIG_IPV6_SUBTREES
797 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 798 rt->rt6i_src.addr = *saddr;
1da177e4
LT
799 rt->rt6i_src.plen = 128;
800 }
801#endif
802
14deae41 803 retry:
8ade06c6 804 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 805 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
806 int saved_rt_min_interval =
807 net->ipv6.sysctl.ip6_rt_gc_min_interval;
808 int saved_rt_elasticity =
809 net->ipv6.sysctl.ip6_rt_gc_elasticity;
810
811 if (attempts-- > 0) {
812 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
813 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
814
86393e52 815 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
816
817 net->ipv6.sysctl.ip6_rt_gc_elasticity =
818 saved_rt_elasticity;
819 net->ipv6.sysctl.ip6_rt_gc_min_interval =
820 saved_rt_min_interval;
821 goto retry;
822 }
823
f3213831 824 net_warn_ratelimited("Neighbour table overflow\n");
d8d1f30b 825 dst_free(&rt->dst);
14deae41
DM
826 return NULL;
827 }
95a9a5ba 828 }
1da177e4 829
95a9a5ba
YH
830 return rt;
831}
1da177e4 832
21efcfa0
ED
833static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
834 const struct in6_addr *daddr)
299d9939 835{
21efcfa0
ED
836 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
837
299d9939 838 if (rt) {
299d9939 839 rt->rt6i_flags |= RTF_CACHE;
97cac082 840 rt->n = neigh_clone(ort->n);
299d9939
YH
841 }
842 return rt;
843}
844
8ed67789 845static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 846 struct flowi6 *fl6, int flags)
1da177e4
LT
847{
848 struct fib6_node *fn;
519fbd87 849 struct rt6_info *rt, *nrt;
c71099ac 850 int strict = 0;
1da177e4 851 int attempts = 3;
519fbd87 852 int err;
53b7997f 853 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 854
77d16f45 855 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
856
857relookup:
c71099ac 858 read_lock_bh(&table->tb6_lock);
1da177e4 859
8238dd06 860restart_2:
4c9483b2 861 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
862
863restart:
4acad72d 864 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 865
4c9483b2 866 BACKTRACK(net, &fl6->saddr);
8ed67789 867 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 868 rt->rt6i_flags & RTF_CACHE)
1ddef044 869 goto out;
1da177e4 870
d8d1f30b 871 dst_hold(&rt->dst);
c71099ac 872 read_unlock_bh(&table->tb6_lock);
fb9de91e 873
97cac082 874 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 875 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 876 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 877 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
878 else
879 goto out2;
e40cf353 880
d8d1f30b 881 dst_release(&rt->dst);
8ed67789 882 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 883
d8d1f30b 884 dst_hold(&rt->dst);
519fbd87 885 if (nrt) {
40e22e8f 886 err = ip6_ins_rt(nrt);
519fbd87 887 if (!err)
1da177e4 888 goto out2;
1da177e4 889 }
1da177e4 890
519fbd87
YH
891 if (--attempts <= 0)
892 goto out2;
893
894 /*
c71099ac 895 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
896 * released someone could insert this route. Relookup.
897 */
d8d1f30b 898 dst_release(&rt->dst);
519fbd87
YH
899 goto relookup;
900
901out:
8238dd06
YH
902 if (reachable) {
903 reachable = 0;
904 goto restart_2;
905 }
d8d1f30b 906 dst_hold(&rt->dst);
c71099ac 907 read_unlock_bh(&table->tb6_lock);
1da177e4 908out2:
d8d1f30b
CG
909 rt->dst.lastuse = jiffies;
910 rt->dst.__use++;
c71099ac
TG
911
912 return rt;
1da177e4
LT
913}
914
8ed67789 915static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 916 struct flowi6 *fl6, int flags)
4acad72d 917{
4c9483b2 918 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
919}
920
72331bc0
SL
921static struct dst_entry *ip6_route_input_lookup(struct net *net,
922 struct net_device *dev,
923 struct flowi6 *fl6, int flags)
924{
925 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
926 flags |= RT6_LOOKUP_F_IFACE;
927
928 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
929}
930
c71099ac
TG
931void ip6_route_input(struct sk_buff *skb)
932{
b71d1d42 933 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 934 struct net *net = dev_net(skb->dev);
adaa70bb 935 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
936 struct flowi6 fl6 = {
937 .flowi6_iif = skb->dev->ifindex,
938 .daddr = iph->daddr,
939 .saddr = iph->saddr,
38308473 940 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
941 .flowi6_mark = skb->mark,
942 .flowi6_proto = iph->nexthdr,
c71099ac 943 };
adaa70bb 944
72331bc0 945 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
946}
947
8ed67789 948static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 949 struct flowi6 *fl6, int flags)
1da177e4 950{
4c9483b2 951 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
952}
953
9c7a4f9c 954struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 955 struct flowi6 *fl6)
c71099ac
TG
956{
957 int flags = 0;
958
1fb9489b 959 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 960
4c9483b2 961 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 962 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 963
4c9483b2 964 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 965 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
966 else if (sk)
967 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 968
4c9483b2 969 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
970}
971
7159039a 972EXPORT_SYMBOL(ip6_route_output);
1da177e4 973
2774c131 974struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 975{
5c1e6aa3 976 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
977 struct dst_entry *new = NULL;
978
f5b0a874 979 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 980 if (rt) {
d8d1f30b 981 new = &rt->dst;
14e50e57 982
8104891b
SK
983 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
984 rt6_init_peer(rt, net->ipv6.peers);
985
14e50e57 986 new->__use = 1;
352e512c
HX
987 new->input = dst_discard;
988 new->output = dst_discard;
14e50e57 989
21efcfa0
ED
990 if (dst_metrics_read_only(&ort->dst))
991 new->_metrics = ort->dst._metrics;
992 else
993 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
994 rt->rt6i_idev = ort->rt6i_idev;
995 if (rt->rt6i_idev)
996 in6_dev_hold(rt->rt6i_idev);
14e50e57 997
4e3fd7a0 998 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
999 rt->rt6i_flags = ort->rt6i_flags;
1000 rt6_clean_expires(rt);
14e50e57
DM
1001 rt->rt6i_metric = 0;
1002
1003 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1004#ifdef CONFIG_IPV6_SUBTREES
1005 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1006#endif
1007
1008 dst_free(new);
1009 }
1010
69ead7af
DM
1011 dst_release(dst_orig);
1012 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1013}
14e50e57 1014
1da177e4
LT
1015/*
1016 * Destination cache support functions
1017 */
1018
1019static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1020{
1021 struct rt6_info *rt;
1022
1023 rt = (struct rt6_info *) dst;
1024
6431cbc2
DM
1025 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1026 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
97bab73f 1027 if (!rt6_has_peer(rt))
6431cbc2
DM
1028 rt6_bind_peer(rt, 0);
1029 rt->rt6i_peer_genid = rt6_peer_genid();
1030 }
1da177e4 1031 return dst;
6431cbc2 1032 }
1da177e4
LT
1033 return NULL;
1034}
1035
1036static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1037{
1038 struct rt6_info *rt = (struct rt6_info *) dst;
1039
1040 if (rt) {
54c1a859
YH
1041 if (rt->rt6i_flags & RTF_CACHE) {
1042 if (rt6_check_expired(rt)) {
1043 ip6_del_rt(rt);
1044 dst = NULL;
1045 }
1046 } else {
1da177e4 1047 dst_release(dst);
54c1a859
YH
1048 dst = NULL;
1049 }
1da177e4 1050 }
54c1a859 1051 return dst;
1da177e4
LT
1052}
1053
1054static void ip6_link_failure(struct sk_buff *skb)
1055{
1056 struct rt6_info *rt;
1057
3ffe533c 1058 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1059
adf30907 1060 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1061 if (rt) {
1716a961
G
1062 if (rt->rt6i_flags & RTF_CACHE)
1063 rt6_update_expires(rt, 0);
1064 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1da177e4
LT
1065 rt->rt6i_node->fn_sernum = -1;
1066 }
1067}
1068
6700c270
DM
1069static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1070 struct sk_buff *skb, u32 mtu)
1da177e4
LT
1071{
1072 struct rt6_info *rt6 = (struct rt6_info*)dst;
1073
81aded24 1074 dst_confirm(dst);
1da177e4 1075 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
81aded24
DM
1076 struct net *net = dev_net(dst->dev);
1077
1da177e4
LT
1078 rt6->rt6i_flags |= RTF_MODIFIED;
1079 if (mtu < IPV6_MIN_MTU) {
defb3519 1080 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1081 mtu = IPV6_MIN_MTU;
defb3519
DM
1082 features |= RTAX_FEATURE_ALLFRAG;
1083 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1084 }
defb3519 1085 dst_metric_set(dst, RTAX_MTU, mtu);
81aded24 1086 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1087 }
1088}
1089
42ae66c8
DM
1090void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1091 int oif, u32 mark)
81aded24
DM
1092{
1093 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1094 struct dst_entry *dst;
1095 struct flowi6 fl6;
1096
1097 memset(&fl6, 0, sizeof(fl6));
1098 fl6.flowi6_oif = oif;
1099 fl6.flowi6_mark = mark;
3e12939a 1100 fl6.flowi6_flags = 0;
81aded24
DM
1101 fl6.daddr = iph->daddr;
1102 fl6.saddr = iph->saddr;
1103 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1104
1105 dst = ip6_route_output(net, NULL, &fl6);
1106 if (!dst->error)
6700c270 1107 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
81aded24
DM
1108 dst_release(dst);
1109}
1110EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1111
1112void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1113{
1114 ip6_update_pmtu(skb, sock_net(sk), mtu,
1115 sk->sk_bound_dev_if, sk->sk_mark);
1116}
1117EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1118
3a5ad2ee
DM
1119void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1120{
1121 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1122 struct dst_entry *dst;
1123 struct flowi6 fl6;
1124
1125 memset(&fl6, 0, sizeof(fl6));
1126 fl6.flowi6_oif = oif;
1127 fl6.flowi6_mark = mark;
1128 fl6.flowi6_flags = 0;
1129 fl6.daddr = iph->daddr;
1130 fl6.saddr = iph->saddr;
1131 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1132
1133 dst = ip6_route_output(net, NULL, &fl6);
1134 if (!dst->error)
6700c270 1135 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1136 dst_release(dst);
1137}
1138EXPORT_SYMBOL_GPL(ip6_redirect);
1139
1140void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1141{
1142 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1143}
1144EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1145
0dbaee3b 1146static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1147{
0dbaee3b
DM
1148 struct net_device *dev = dst->dev;
1149 unsigned int mtu = dst_mtu(dst);
1150 struct net *net = dev_net(dev);
1151
1da177e4
LT
1152 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1153
5578689a
DL
1154 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1155 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1156
1157 /*
1ab1457c
YH
1158 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1159 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1160 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1161 * rely only on pmtu discovery"
1162 */
1163 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1164 mtu = IPV6_MAXPLEN;
1165 return mtu;
1166}
1167
ebb762f2 1168static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1169{
d33e4553 1170 struct inet6_dev *idev;
618f9bc7
SK
1171 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1172
1173 if (mtu)
1174 return mtu;
1175
1176 mtu = IPV6_MIN_MTU;
d33e4553
DM
1177
1178 rcu_read_lock();
1179 idev = __in6_dev_get(dst->dev);
1180 if (idev)
1181 mtu = idev->cnf.mtu6;
1182 rcu_read_unlock();
1183
1184 return mtu;
1185}
1186
3b00944c
YH
1187static struct dst_entry *icmp6_dst_gc_list;
1188static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1189
3b00944c 1190struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1191 struct neighbour *neigh,
87a11578 1192 struct flowi6 *fl6)
1da177e4 1193{
87a11578 1194 struct dst_entry *dst;
1da177e4
LT
1195 struct rt6_info *rt;
1196 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1197 struct net *net = dev_net(dev);
1da177e4 1198
38308473 1199 if (unlikely(!idev))
122bdf67 1200 return ERR_PTR(-ENODEV);
1da177e4 1201
8b96d22d 1202 rt = ip6_dst_alloc(net, dev, 0, NULL);
38308473 1203 if (unlikely(!rt)) {
1da177e4 1204 in6_dev_put(idev);
87a11578 1205 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1206 goto out;
1207 }
1208
1da177e4
LT
1209 if (neigh)
1210 neigh_hold(neigh);
14deae41 1211 else {
f894cbf8 1212 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
b43faac6 1213 if (IS_ERR(neigh)) {
252c3d84 1214 in6_dev_put(idev);
b43faac6
DM
1215 dst_free(&rt->dst);
1216 return ERR_CAST(neigh);
1217 }
14deae41 1218 }
1da177e4 1219
8e2ec639
YZ
1220 rt->dst.flags |= DST_HOST;
1221 rt->dst.output = ip6_output;
97cac082 1222 rt->n = neigh;
d8d1f30b 1223 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1224 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1225 rt->rt6i_dst.plen = 128;
1226 rt->rt6i_idev = idev;
7011687f 1227 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1228
3b00944c 1229 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1230 rt->dst.next = icmp6_dst_gc_list;
1231 icmp6_dst_gc_list = &rt->dst;
3b00944c 1232 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1233
5578689a 1234 fib6_force_start_gc(net);
1da177e4 1235
87a11578
DM
1236 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1237
1da177e4 1238out:
87a11578 1239 return dst;
1da177e4
LT
1240}
1241
3d0f24a7 1242int icmp6_dst_gc(void)
1da177e4 1243{
e9476e95 1244 struct dst_entry *dst, **pprev;
3d0f24a7 1245 int more = 0;
1da177e4 1246
3b00944c
YH
1247 spin_lock_bh(&icmp6_dst_lock);
1248 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1249
1da177e4
LT
1250 while ((dst = *pprev) != NULL) {
1251 if (!atomic_read(&dst->__refcnt)) {
1252 *pprev = dst->next;
1253 dst_free(dst);
1da177e4
LT
1254 } else {
1255 pprev = &dst->next;
3d0f24a7 1256 ++more;
1da177e4
LT
1257 }
1258 }
1259
3b00944c 1260 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1261
3d0f24a7 1262 return more;
1da177e4
LT
1263}
1264
1e493d19
DM
1265static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1266 void *arg)
1267{
1268 struct dst_entry *dst, **pprev;
1269
1270 spin_lock_bh(&icmp6_dst_lock);
1271 pprev = &icmp6_dst_gc_list;
1272 while ((dst = *pprev) != NULL) {
1273 struct rt6_info *rt = (struct rt6_info *) dst;
1274 if (func(rt, arg)) {
1275 *pprev = dst->next;
1276 dst_free(dst);
1277 } else {
1278 pprev = &dst->next;
1279 }
1280 }
1281 spin_unlock_bh(&icmp6_dst_lock);
1282}
1283
569d3645 1284static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1285{
1da177e4 1286 unsigned long now = jiffies;
86393e52 1287 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1288 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1289 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1290 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1291 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1292 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1293 int entries;
7019b78e 1294
fc66f95c 1295 entries = dst_entries_get_fast(ops);
7019b78e 1296 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1297 entries <= rt_max_size)
1da177e4
LT
1298 goto out;
1299
6891a346
BT
1300 net->ipv6.ip6_rt_gc_expire++;
1301 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1302 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1303 entries = dst_entries_get_slow(ops);
1304 if (entries < ops->gc_thresh)
7019b78e 1305 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1306out:
7019b78e 1307 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1308 return entries > rt_max_size;
1da177e4
LT
1309}
1310
1311/* Clean host part of a prefix. Not necessary in radix tree,
1312 but results in cleaner routing tables.
1313
1314 Remove it only when all the things will work!
1315 */
1316
6b75d090 1317int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1318{
5170ae82 1319 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1320 if (hoplimit == 0) {
6b75d090 1321 struct net_device *dev = dst->dev;
c68f24cc
ED
1322 struct inet6_dev *idev;
1323
1324 rcu_read_lock();
1325 idev = __in6_dev_get(dev);
1326 if (idev)
6b75d090 1327 hoplimit = idev->cnf.hop_limit;
c68f24cc 1328 else
53b7997f 1329 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1330 rcu_read_unlock();
1da177e4
LT
1331 }
1332 return hoplimit;
1333}
abbf46ae 1334EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1335
1336/*
1337 *
1338 */
1339
86872cb5 1340int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1341{
1342 int err;
5578689a 1343 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1344 struct rt6_info *rt = NULL;
1345 struct net_device *dev = NULL;
1346 struct inet6_dev *idev = NULL;
c71099ac 1347 struct fib6_table *table;
1da177e4
LT
1348 int addr_type;
1349
86872cb5 1350 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1351 return -EINVAL;
1352#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1353 if (cfg->fc_src_len)
1da177e4
LT
1354 return -EINVAL;
1355#endif
86872cb5 1356 if (cfg->fc_ifindex) {
1da177e4 1357 err = -ENODEV;
5578689a 1358 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1359 if (!dev)
1360 goto out;
1361 idev = in6_dev_get(dev);
1362 if (!idev)
1363 goto out;
1364 }
1365
86872cb5
TG
1366 if (cfg->fc_metric == 0)
1367 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1368
d71314b4 1369 err = -ENOBUFS;
38308473
DM
1370 if (cfg->fc_nlinfo.nlh &&
1371 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1372 table = fib6_get_table(net, cfg->fc_table);
38308473 1373 if (!table) {
f3213831 1374 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1375 table = fib6_new_table(net, cfg->fc_table);
1376 }
1377 } else {
1378 table = fib6_new_table(net, cfg->fc_table);
1379 }
38308473
DM
1380
1381 if (!table)
c71099ac 1382 goto out;
c71099ac 1383
8b96d22d 1384 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1da177e4 1385
38308473 1386 if (!rt) {
1da177e4
LT
1387 err = -ENOMEM;
1388 goto out;
1389 }
1390
d8d1f30b 1391 rt->dst.obsolete = -1;
1716a961
G
1392
1393 if (cfg->fc_flags & RTF_EXPIRES)
1394 rt6_set_expires(rt, jiffies +
1395 clock_t_to_jiffies(cfg->fc_expires));
1396 else
1397 rt6_clean_expires(rt);
1da177e4 1398
86872cb5
TG
1399 if (cfg->fc_protocol == RTPROT_UNSPEC)
1400 cfg->fc_protocol = RTPROT_BOOT;
1401 rt->rt6i_protocol = cfg->fc_protocol;
1402
1403 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1404
1405 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1406 rt->dst.input = ip6_mc_input;
ab79ad14
1407 else if (cfg->fc_flags & RTF_LOCAL)
1408 rt->dst.input = ip6_input;
1da177e4 1409 else
d8d1f30b 1410 rt->dst.input = ip6_forward;
1da177e4 1411
d8d1f30b 1412 rt->dst.output = ip6_output;
1da177e4 1413
86872cb5
TG
1414 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1415 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1416 if (rt->rt6i_dst.plen == 128)
11d53b49 1417 rt->dst.flags |= DST_HOST;
1da177e4 1418
8e2ec639
YZ
1419 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1420 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1421 if (!metrics) {
1422 err = -ENOMEM;
1423 goto out;
1424 }
1425 dst_init_metrics(&rt->dst, metrics, 0);
1426 }
1da177e4 1427#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1428 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1429 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1430#endif
1431
86872cb5 1432 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1433
1434 /* We cannot add true routes via loopback here,
1435 they would result in kernel looping; promote them to reject routes
1436 */
86872cb5 1437 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1438 (dev && (dev->flags & IFF_LOOPBACK) &&
1439 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1440 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1441 /* hold loopback dev/idev if we haven't done so. */
5578689a 1442 if (dev != net->loopback_dev) {
1da177e4
LT
1443 if (dev) {
1444 dev_put(dev);
1445 in6_dev_put(idev);
1446 }
5578689a 1447 dev = net->loopback_dev;
1da177e4
LT
1448 dev_hold(dev);
1449 idev = in6_dev_get(dev);
1450 if (!idev) {
1451 err = -ENODEV;
1452 goto out;
1453 }
1454 }
d8d1f30b
CG
1455 rt->dst.output = ip6_pkt_discard_out;
1456 rt->dst.input = ip6_pkt_discard;
1da177e4 1457 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1458 switch (cfg->fc_type) {
1459 case RTN_BLACKHOLE:
1460 rt->dst.error = -EINVAL;
1461 break;
1462 case RTN_PROHIBIT:
1463 rt->dst.error = -EACCES;
1464 break;
b4949ab2
ND
1465 case RTN_THROW:
1466 rt->dst.error = -EAGAIN;
1467 break;
ef2c7d7b
ND
1468 default:
1469 rt->dst.error = -ENETUNREACH;
1470 break;
1471 }
1da177e4
LT
1472 goto install_route;
1473 }
1474
86872cb5 1475 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1476 const struct in6_addr *gw_addr;
1da177e4
LT
1477 int gwa_type;
1478
86872cb5 1479 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1480 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1481 gwa_type = ipv6_addr_type(gw_addr);
1482
1483 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1484 struct rt6_info *grt;
1485
1486 /* IPv6 strictly inhibits using not link-local
1487 addresses as nexthop address.
1488 Otherwise, router will not able to send redirects.
1489 It is very good, but in some (rare!) circumstances
1490 (SIT, PtP, NBMA NOARP links) it is handy to allow
1491 some exceptions. --ANK
1492 */
1493 err = -EINVAL;
38308473 1494 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1495 goto out;
1496
5578689a 1497 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1498
1499 err = -EHOSTUNREACH;
38308473 1500 if (!grt)
1da177e4
LT
1501 goto out;
1502 if (dev) {
d1918542 1503 if (dev != grt->dst.dev) {
d8d1f30b 1504 dst_release(&grt->dst);
1da177e4
LT
1505 goto out;
1506 }
1507 } else {
d1918542 1508 dev = grt->dst.dev;
1da177e4
LT
1509 idev = grt->rt6i_idev;
1510 dev_hold(dev);
1511 in6_dev_hold(grt->rt6i_idev);
1512 }
38308473 1513 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1514 err = 0;
d8d1f30b 1515 dst_release(&grt->dst);
1da177e4
LT
1516
1517 if (err)
1518 goto out;
1519 }
1520 err = -EINVAL;
38308473 1521 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1522 goto out;
1523 }
1524
1525 err = -ENODEV;
38308473 1526 if (!dev)
1da177e4
LT
1527 goto out;
1528
c3968a85
DW
1529 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1530 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1531 err = -EINVAL;
1532 goto out;
1533 }
4e3fd7a0 1534 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1535 rt->rt6i_prefsrc.plen = 128;
1536 } else
1537 rt->rt6i_prefsrc.plen = 0;
1538
86872cb5 1539 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1540 err = rt6_bind_neighbour(rt, dev);
f83c7790 1541 if (err)
1da177e4 1542 goto out;
1da177e4
LT
1543 }
1544
86872cb5 1545 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1546
1547install_route:
86872cb5
TG
1548 if (cfg->fc_mx) {
1549 struct nlattr *nla;
1550 int remaining;
1551
1552 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1553 int type = nla_type(nla);
86872cb5
TG
1554
1555 if (type) {
1556 if (type > RTAX_MAX) {
1da177e4
LT
1557 err = -EINVAL;
1558 goto out;
1559 }
86872cb5 1560
defb3519 1561 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1562 }
1da177e4
LT
1563 }
1564 }
1565
d8d1f30b 1566 rt->dst.dev = dev;
1da177e4 1567 rt->rt6i_idev = idev;
c71099ac 1568 rt->rt6i_table = table;
63152fc0 1569
c346dca1 1570 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1571
86872cb5 1572 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1573
1574out:
1575 if (dev)
1576 dev_put(dev);
1577 if (idev)
1578 in6_dev_put(idev);
1579 if (rt)
d8d1f30b 1580 dst_free(&rt->dst);
1da177e4
LT
1581 return err;
1582}
1583
86872cb5 1584static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1585{
1586 int err;
c71099ac 1587 struct fib6_table *table;
d1918542 1588 struct net *net = dev_net(rt->dst.dev);
1da177e4 1589
8ed67789 1590 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1591 return -ENOENT;
1592
c71099ac
TG
1593 table = rt->rt6i_table;
1594 write_lock_bh(&table->tb6_lock);
1da177e4 1595
86872cb5 1596 err = fib6_del(rt, info);
d8d1f30b 1597 dst_release(&rt->dst);
1da177e4 1598
c71099ac 1599 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1600
1601 return err;
1602}
1603
e0a1ad73
TG
1604int ip6_del_rt(struct rt6_info *rt)
1605{
4d1169c1 1606 struct nl_info info = {
d1918542 1607 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1608 };
528c4ceb 1609 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1610}
1611
86872cb5 1612static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1613{
c71099ac 1614 struct fib6_table *table;
1da177e4
LT
1615 struct fib6_node *fn;
1616 struct rt6_info *rt;
1617 int err = -ESRCH;
1618
5578689a 1619 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1620 if (!table)
c71099ac
TG
1621 return err;
1622
1623 read_lock_bh(&table->tb6_lock);
1da177e4 1624
c71099ac 1625 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1626 &cfg->fc_dst, cfg->fc_dst_len,
1627 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1628
1da177e4 1629 if (fn) {
d8d1f30b 1630 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1631 if (cfg->fc_ifindex &&
d1918542
DM
1632 (!rt->dst.dev ||
1633 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1634 continue;
86872cb5
TG
1635 if (cfg->fc_flags & RTF_GATEWAY &&
1636 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1637 continue;
86872cb5 1638 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1639 continue;
d8d1f30b 1640 dst_hold(&rt->dst);
c71099ac 1641 read_unlock_bh(&table->tb6_lock);
1da177e4 1642
86872cb5 1643 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1644 }
1645 }
c71099ac 1646 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1647
1648 return err;
1649}
1650
6700c270 1651static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 1652{
e8599ff4 1653 struct net *net = dev_net(skb->dev);
a6279458 1654 struct netevent_redirect netevent;
e8599ff4
DM
1655 struct rt6_info *rt, *nrt = NULL;
1656 const struct in6_addr *target;
e8599ff4 1657 struct ndisc_options ndopts;
6e157b6a
DM
1658 const struct in6_addr *dest;
1659 struct neighbour *old_neigh;
e8599ff4
DM
1660 struct inet6_dev *in6_dev;
1661 struct neighbour *neigh;
1662 struct icmp6hdr *icmph;
6e157b6a
DM
1663 int optlen, on_link;
1664 u8 *lladdr;
e8599ff4
DM
1665
1666 optlen = skb->tail - skb->transport_header;
1667 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1668
1669 if (optlen < 0) {
6e157b6a 1670 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
1671 return;
1672 }
1673
1674 icmph = icmp6_hdr(skb);
1675 target = (const struct in6_addr *) (icmph + 1);
1676 dest = target + 1;
1677
1678 if (ipv6_addr_is_multicast(dest)) {
6e157b6a 1679 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
1680 return;
1681 }
1682
6e157b6a 1683 on_link = 0;
e8599ff4
DM
1684 if (ipv6_addr_equal(dest, target)) {
1685 on_link = 1;
1686 } else if (ipv6_addr_type(target) !=
1687 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 1688 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
1689 return;
1690 }
1691
1692 in6_dev = __in6_dev_get(skb->dev);
1693 if (!in6_dev)
1694 return;
1695 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1696 return;
1697
1698 /* RFC2461 8.1:
1699 * The IP source address of the Redirect MUST be the same as the current
1700 * first-hop router for the specified ICMP Destination Address.
1701 */
1702
1703 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1704 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1705 return;
1706 }
6e157b6a
DM
1707
1708 lladdr = NULL;
e8599ff4
DM
1709 if (ndopts.nd_opts_tgt_lladdr) {
1710 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1711 skb->dev);
1712 if (!lladdr) {
1713 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1714 return;
1715 }
1716 }
1717
6e157b6a
DM
1718 rt = (struct rt6_info *) dst;
1719 if (rt == net->ipv6.ip6_null_entry) {
1720 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 1721 return;
6e157b6a 1722 }
e8599ff4 1723
6e157b6a
DM
1724 /* Redirect received -> path was valid.
1725 * Look, redirects are sent only in response to data packets,
1726 * so that this nexthop apparently is reachable. --ANK
1727 */
1728 dst_confirm(&rt->dst);
a6279458 1729
6e157b6a
DM
1730 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1731 if (!neigh)
1732 return;
a6279458 1733
6e157b6a
DM
1734 /* Duplicate redirect: silently ignore. */
1735 old_neigh = rt->n;
1736 if (neigh == old_neigh)
a6279458 1737 goto out;
1da177e4 1738
1da177e4
LT
1739 /*
1740 * We have finally decided to accept it.
1741 */
1742
1ab1457c 1743 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1744 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1745 NEIGH_UPDATE_F_OVERRIDE|
1746 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1747 NEIGH_UPDATE_F_ISROUTER))
1748 );
1749
21efcfa0 1750 nrt = ip6_rt_copy(rt, dest);
38308473 1751 if (!nrt)
1da177e4
LT
1752 goto out;
1753
1754 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1755 if (on_link)
1756 nrt->rt6i_flags &= ~RTF_GATEWAY;
1757
4e3fd7a0 1758 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
97cac082 1759 nrt->n = neigh_clone(neigh);
1da177e4 1760
40e22e8f 1761 if (ip6_ins_rt(nrt))
1da177e4
LT
1762 goto out;
1763
d8d1f30b 1764 netevent.old = &rt->dst;
1d248b1c 1765 netevent.old_neigh = old_neigh;
d8d1f30b 1766 netevent.new = &nrt->dst;
1d248b1c
DM
1767 netevent.new_neigh = neigh;
1768 netevent.daddr = dest;
8d71740c
TT
1769 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1770
38308473 1771 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 1772 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 1773 ip6_del_rt(rt);
1da177e4
LT
1774 }
1775
1776out:
e8599ff4 1777 neigh_release(neigh);
6e157b6a
DM
1778}
1779
1da177e4
LT
1780/*
1781 * Misc support functions
1782 */
1783
1716a961 1784static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1785 const struct in6_addr *dest)
1da177e4 1786{
d1918542 1787 struct net *net = dev_net(ort->dst.dev);
8b96d22d
DM
1788 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1789 ort->rt6i_table);
1da177e4
LT
1790
1791 if (rt) {
d8d1f30b
CG
1792 rt->dst.input = ort->dst.input;
1793 rt->dst.output = ort->dst.output;
8e2ec639 1794 rt->dst.flags |= DST_HOST;
d8d1f30b 1795
4e3fd7a0 1796 rt->rt6i_dst.addr = *dest;
8e2ec639 1797 rt->rt6i_dst.plen = 128;
defb3519 1798 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1799 rt->dst.error = ort->dst.error;
1da177e4
LT
1800 rt->rt6i_idev = ort->rt6i_idev;
1801 if (rt->rt6i_idev)
1802 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1803 rt->dst.lastuse = jiffies;
1da177e4 1804
4e3fd7a0 1805 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1806 rt->rt6i_flags = ort->rt6i_flags;
1807 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1808 (RTF_DEFAULT | RTF_ADDRCONF))
1809 rt6_set_from(rt, ort);
1810 else
1811 rt6_clean_expires(rt);
1da177e4
LT
1812 rt->rt6i_metric = 0;
1813
1da177e4
LT
1814#ifdef CONFIG_IPV6_SUBTREES
1815 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1816#endif
0f6c6392 1817 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1818 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1819 }
1820 return rt;
1821}
1822
70ceb4f5 1823#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1824static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1825 const struct in6_addr *prefix, int prefixlen,
1826 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1827{
1828 struct fib6_node *fn;
1829 struct rt6_info *rt = NULL;
c71099ac
TG
1830 struct fib6_table *table;
1831
efa2cea0 1832 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1833 if (!table)
c71099ac 1834 return NULL;
70ceb4f5 1835
5744dd9b 1836 read_lock_bh(&table->tb6_lock);
c71099ac 1837 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1838 if (!fn)
1839 goto out;
1840
d8d1f30b 1841 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1842 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1843 continue;
1844 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1845 continue;
1846 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1847 continue;
d8d1f30b 1848 dst_hold(&rt->dst);
70ceb4f5
YH
1849 break;
1850 }
1851out:
5744dd9b 1852 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1853 return rt;
1854}
1855
efa2cea0 1856static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1857 const struct in6_addr *prefix, int prefixlen,
1858 const struct in6_addr *gwaddr, int ifindex,
95c96174 1859 unsigned int pref)
70ceb4f5 1860{
86872cb5
TG
1861 struct fib6_config cfg = {
1862 .fc_table = RT6_TABLE_INFO,
238fc7ea 1863 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1864 .fc_ifindex = ifindex,
1865 .fc_dst_len = prefixlen,
1866 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1867 RTF_UP | RTF_PREF(pref),
15e47304 1868 .fc_nlinfo.portid = 0,
efa2cea0
DL
1869 .fc_nlinfo.nlh = NULL,
1870 .fc_nlinfo.nl_net = net,
86872cb5
TG
1871 };
1872
4e3fd7a0
AD
1873 cfg.fc_dst = *prefix;
1874 cfg.fc_gateway = *gwaddr;
70ceb4f5 1875
e317da96
YH
1876 /* We should treat it as a default route if prefix length is 0. */
1877 if (!prefixlen)
86872cb5 1878 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1879
86872cb5 1880 ip6_route_add(&cfg);
70ceb4f5 1881
efa2cea0 1882 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1883}
1884#endif
1885
b71d1d42 1886struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1887{
1da177e4 1888 struct rt6_info *rt;
c71099ac 1889 struct fib6_table *table;
1da177e4 1890
c346dca1 1891 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1892 if (!table)
c71099ac 1893 return NULL;
1da177e4 1894
5744dd9b 1895 read_lock_bh(&table->tb6_lock);
d8d1f30b 1896 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1897 if (dev == rt->dst.dev &&
045927ff 1898 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1899 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1900 break;
1901 }
1902 if (rt)
d8d1f30b 1903 dst_hold(&rt->dst);
5744dd9b 1904 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1905 return rt;
1906}
1907
b71d1d42 1908struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1909 struct net_device *dev,
1910 unsigned int pref)
1da177e4 1911{
86872cb5
TG
1912 struct fib6_config cfg = {
1913 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1914 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1915 .fc_ifindex = dev->ifindex,
1916 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1917 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 1918 .fc_nlinfo.portid = 0,
5578689a 1919 .fc_nlinfo.nlh = NULL,
c346dca1 1920 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1921 };
1da177e4 1922
4e3fd7a0 1923 cfg.fc_gateway = *gwaddr;
1da177e4 1924
86872cb5 1925 ip6_route_add(&cfg);
1da177e4 1926
1da177e4
LT
1927 return rt6_get_dflt_router(gwaddr, dev);
1928}
1929
7b4da532 1930void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1931{
1932 struct rt6_info *rt;
c71099ac
TG
1933 struct fib6_table *table;
1934
1935 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1936 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1937 if (!table)
c71099ac 1938 return;
1da177e4
LT
1939
1940restart:
c71099ac 1941 read_lock_bh(&table->tb6_lock);
d8d1f30b 1942 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1943 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1944 dst_hold(&rt->dst);
c71099ac 1945 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1946 ip6_del_rt(rt);
1da177e4
LT
1947 goto restart;
1948 }
1949 }
c71099ac 1950 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1951}
1952
5578689a
DL
1953static void rtmsg_to_fib6_config(struct net *net,
1954 struct in6_rtmsg *rtmsg,
86872cb5
TG
1955 struct fib6_config *cfg)
1956{
1957 memset(cfg, 0, sizeof(*cfg));
1958
1959 cfg->fc_table = RT6_TABLE_MAIN;
1960 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1961 cfg->fc_metric = rtmsg->rtmsg_metric;
1962 cfg->fc_expires = rtmsg->rtmsg_info;
1963 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1964 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1965 cfg->fc_flags = rtmsg->rtmsg_flags;
1966
5578689a 1967 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1968
4e3fd7a0
AD
1969 cfg->fc_dst = rtmsg->rtmsg_dst;
1970 cfg->fc_src = rtmsg->rtmsg_src;
1971 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1972}
1973
5578689a 1974int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1975{
86872cb5 1976 struct fib6_config cfg;
1da177e4
LT
1977 struct in6_rtmsg rtmsg;
1978 int err;
1979
1980 switch(cmd) {
1981 case SIOCADDRT: /* Add a route */
1982 case SIOCDELRT: /* Delete a route */
1983 if (!capable(CAP_NET_ADMIN))
1984 return -EPERM;
1985 err = copy_from_user(&rtmsg, arg,
1986 sizeof(struct in6_rtmsg));
1987 if (err)
1988 return -EFAULT;
86872cb5 1989
5578689a 1990 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1991
1da177e4
LT
1992 rtnl_lock();
1993 switch (cmd) {
1994 case SIOCADDRT:
86872cb5 1995 err = ip6_route_add(&cfg);
1da177e4
LT
1996 break;
1997 case SIOCDELRT:
86872cb5 1998 err = ip6_route_del(&cfg);
1da177e4
LT
1999 break;
2000 default:
2001 err = -EINVAL;
2002 }
2003 rtnl_unlock();
2004
2005 return err;
3ff50b79 2006 }
1da177e4
LT
2007
2008 return -EINVAL;
2009}
2010
2011/*
2012 * Drop the packet on the floor
2013 */
2014
d5fdd6ba 2015static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2016{
612f09e8 2017 int type;
adf30907 2018 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2019 switch (ipstats_mib_noroutes) {
2020 case IPSTATS_MIB_INNOROUTES:
0660e03f 2021 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2022 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2023 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2024 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2025 break;
2026 }
2027 /* FALLTHROUGH */
2028 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2029 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2030 ipstats_mib_noroutes);
612f09e8
YH
2031 break;
2032 }
3ffe533c 2033 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2034 kfree_skb(skb);
2035 return 0;
2036}
2037
9ce8ade0
TG
2038static int ip6_pkt_discard(struct sk_buff *skb)
2039{
612f09e8 2040 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2041}
2042
20380731 2043static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2044{
adf30907 2045 skb->dev = skb_dst(skb)->dev;
612f09e8 2046 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2047}
2048
6723ab54
DM
2049#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2050
9ce8ade0
TG
2051static int ip6_pkt_prohibit(struct sk_buff *skb)
2052{
612f09e8 2053 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2054}
2055
2056static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2057{
adf30907 2058 skb->dev = skb_dst(skb)->dev;
612f09e8 2059 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2060}
2061
6723ab54
DM
2062#endif
2063
1da177e4
LT
2064/*
2065 * Allocate a dst for local (unicast / anycast) address.
2066 */
2067
2068struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2069 const struct in6_addr *addr,
8f031519 2070 bool anycast)
1da177e4 2071{
c346dca1 2072 struct net *net = dev_net(idev->dev);
8b96d22d 2073 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
f83c7790 2074 int err;
1da177e4 2075
38308473 2076 if (!rt) {
f3213831 2077 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
1da177e4 2078 return ERR_PTR(-ENOMEM);
40385653 2079 }
1da177e4 2080
1da177e4
LT
2081 in6_dev_hold(idev);
2082
11d53b49 2083 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2084 rt->dst.input = ip6_input;
2085 rt->dst.output = ip6_output;
1da177e4 2086 rt->rt6i_idev = idev;
d8d1f30b 2087 rt->dst.obsolete = -1;
1da177e4
LT
2088
2089 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2090 if (anycast)
2091 rt->rt6i_flags |= RTF_ANYCAST;
2092 else
1da177e4 2093 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2094 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2095 if (err) {
d8d1f30b 2096 dst_free(&rt->dst);
f83c7790 2097 return ERR_PTR(err);
1da177e4
LT
2098 }
2099
4e3fd7a0 2100 rt->rt6i_dst.addr = *addr;
1da177e4 2101 rt->rt6i_dst.plen = 128;
5578689a 2102 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2103
d8d1f30b 2104 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2105
2106 return rt;
2107}
2108
c3968a85
DW
2109int ip6_route_get_saddr(struct net *net,
2110 struct rt6_info *rt,
b71d1d42 2111 const struct in6_addr *daddr,
c3968a85
DW
2112 unsigned int prefs,
2113 struct in6_addr *saddr)
2114{
2115 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2116 int err = 0;
2117 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2118 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2119 else
2120 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2121 daddr, prefs, saddr);
2122 return err;
2123}
2124
2125/* remove deleted ip from prefsrc entries */
2126struct arg_dev_net_ip {
2127 struct net_device *dev;
2128 struct net *net;
2129 struct in6_addr *addr;
2130};
2131
2132static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2133{
2134 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2135 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2136 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2137
d1918542 2138 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2139 rt != net->ipv6.ip6_null_entry &&
2140 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2141 /* remove prefsrc entry */
2142 rt->rt6i_prefsrc.plen = 0;
2143 }
2144 return 0;
2145}
2146
2147void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2148{
2149 struct net *net = dev_net(ifp->idev->dev);
2150 struct arg_dev_net_ip adni = {
2151 .dev = ifp->idev->dev,
2152 .net = net,
2153 .addr = &ifp->addr,
2154 };
2155 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2156}
2157
8ed67789
DL
2158struct arg_dev_net {
2159 struct net_device *dev;
2160 struct net *net;
2161};
2162
1da177e4
LT
2163static int fib6_ifdown(struct rt6_info *rt, void *arg)
2164{
bc3ef660 2165 const struct arg_dev_net *adn = arg;
2166 const struct net_device *dev = adn->dev;
8ed67789 2167
d1918542 2168 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2169 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2170 return -1;
c159d30c 2171
1da177e4
LT
2172 return 0;
2173}
2174
f3db4851 2175void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2176{
8ed67789
DL
2177 struct arg_dev_net adn = {
2178 .dev = dev,
2179 .net = net,
2180 };
2181
2182 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2183 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2184}
2185
95c96174 2186struct rt6_mtu_change_arg {
1da177e4 2187 struct net_device *dev;
95c96174 2188 unsigned int mtu;
1da177e4
LT
2189};
2190
2191static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2192{
2193 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2194 struct inet6_dev *idev;
2195
2196 /* In IPv6 pmtu discovery is not optional,
2197 so that RTAX_MTU lock cannot disable it.
2198 We still use this lock to block changes
2199 caused by addrconf/ndisc.
2200 */
2201
2202 idev = __in6_dev_get(arg->dev);
38308473 2203 if (!idev)
1da177e4
LT
2204 return 0;
2205
2206 /* For administrative MTU increase, there is no way to discover
2207 IPv6 PMTU increase, so PMTU increase should be updated here.
2208 Since RFC 1981 doesn't include administrative MTU increase
2209 update PMTU increase is a MUST. (i.e. jumbo frame)
2210 */
2211 /*
2212 If new MTU is less than route PMTU, this new MTU will be the
2213 lowest MTU in the path, update the route PMTU to reflect PMTU
2214 decreases; if new MTU is greater than route PMTU, and the
2215 old MTU is the lowest MTU in the path, update the route PMTU
2216 to reflect the increase. In this case if the other nodes' MTU
2217 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2218 PMTU discouvery.
2219 */
d1918542 2220 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2221 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2222 (dst_mtu(&rt->dst) >= arg->mtu ||
2223 (dst_mtu(&rt->dst) < arg->mtu &&
2224 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2225 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2226 }
1da177e4
LT
2227 return 0;
2228}
2229
95c96174 2230void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2231{
c71099ac
TG
2232 struct rt6_mtu_change_arg arg = {
2233 .dev = dev,
2234 .mtu = mtu,
2235 };
1da177e4 2236
c346dca1 2237 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2238}
2239
ef7c79ed 2240static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2241 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2242 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2243 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2244 [RTA_PRIORITY] = { .type = NLA_U32 },
2245 [RTA_METRICS] = { .type = NLA_NESTED },
2246};
2247
2248static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2249 struct fib6_config *cfg)
1da177e4 2250{
86872cb5
TG
2251 struct rtmsg *rtm;
2252 struct nlattr *tb[RTA_MAX+1];
2253 int err;
1da177e4 2254
86872cb5
TG
2255 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2256 if (err < 0)
2257 goto errout;
1da177e4 2258
86872cb5
TG
2259 err = -EINVAL;
2260 rtm = nlmsg_data(nlh);
2261 memset(cfg, 0, sizeof(*cfg));
2262
2263 cfg->fc_table = rtm->rtm_table;
2264 cfg->fc_dst_len = rtm->rtm_dst_len;
2265 cfg->fc_src_len = rtm->rtm_src_len;
2266 cfg->fc_flags = RTF_UP;
2267 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2268 cfg->fc_type = rtm->rtm_type;
86872cb5 2269
ef2c7d7b
ND
2270 if (rtm->rtm_type == RTN_UNREACHABLE ||
2271 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2272 rtm->rtm_type == RTN_PROHIBIT ||
2273 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2274 cfg->fc_flags |= RTF_REJECT;
2275
ab79ad14
2276 if (rtm->rtm_type == RTN_LOCAL)
2277 cfg->fc_flags |= RTF_LOCAL;
2278
15e47304 2279 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2280 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2281 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2282
2283 if (tb[RTA_GATEWAY]) {
2284 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2285 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2286 }
86872cb5
TG
2287
2288 if (tb[RTA_DST]) {
2289 int plen = (rtm->rtm_dst_len + 7) >> 3;
2290
2291 if (nla_len(tb[RTA_DST]) < plen)
2292 goto errout;
2293
2294 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2295 }
86872cb5
TG
2296
2297 if (tb[RTA_SRC]) {
2298 int plen = (rtm->rtm_src_len + 7) >> 3;
2299
2300 if (nla_len(tb[RTA_SRC]) < plen)
2301 goto errout;
2302
2303 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2304 }
86872cb5 2305
c3968a85
DW
2306 if (tb[RTA_PREFSRC])
2307 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2308
86872cb5
TG
2309 if (tb[RTA_OIF])
2310 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2311
2312 if (tb[RTA_PRIORITY])
2313 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2314
2315 if (tb[RTA_METRICS]) {
2316 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2317 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2318 }
86872cb5
TG
2319
2320 if (tb[RTA_TABLE])
2321 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2322
2323 err = 0;
2324errout:
2325 return err;
1da177e4
LT
2326}
2327
c127ea2c 2328static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2329{
86872cb5
TG
2330 struct fib6_config cfg;
2331 int err;
1da177e4 2332
86872cb5
TG
2333 err = rtm_to_fib6_config(skb, nlh, &cfg);
2334 if (err < 0)
2335 return err;
2336
2337 return ip6_route_del(&cfg);
1da177e4
LT
2338}
2339
c127ea2c 2340static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2341{
86872cb5
TG
2342 struct fib6_config cfg;
2343 int err;
1da177e4 2344
86872cb5
TG
2345 err = rtm_to_fib6_config(skb, nlh, &cfg);
2346 if (err < 0)
2347 return err;
2348
2349 return ip6_route_add(&cfg);
1da177e4
LT
2350}
2351
339bf98f
TG
2352static inline size_t rt6_nlmsg_size(void)
2353{
2354 return NLMSG_ALIGN(sizeof(struct rtmsg))
2355 + nla_total_size(16) /* RTA_SRC */
2356 + nla_total_size(16) /* RTA_DST */
2357 + nla_total_size(16) /* RTA_GATEWAY */
2358 + nla_total_size(16) /* RTA_PREFSRC */
2359 + nla_total_size(4) /* RTA_TABLE */
2360 + nla_total_size(4) /* RTA_IIF */
2361 + nla_total_size(4) /* RTA_OIF */
2362 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2363 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2364 + nla_total_size(sizeof(struct rta_cacheinfo));
2365}
2366
191cd582
BH
2367static int rt6_fill_node(struct net *net,
2368 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 2369 struct in6_addr *dst, struct in6_addr *src,
15e47304 2370 int iif, int type, u32 portid, u32 seq,
7bc570c8 2371 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2372{
2373 struct rtmsg *rtm;
2d7202bf 2374 struct nlmsghdr *nlh;
e3703b3d 2375 long expires;
9e762a4a 2376 u32 table;
f2c31e32 2377 struct neighbour *n;
1da177e4
LT
2378
2379 if (prefix) { /* user wants prefix routes only */
2380 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2381 /* success since this is not a prefix route */
2382 return 1;
2383 }
2384 }
2385
15e47304 2386 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 2387 if (!nlh)
26932566 2388 return -EMSGSIZE;
2d7202bf
TG
2389
2390 rtm = nlmsg_data(nlh);
1da177e4
LT
2391 rtm->rtm_family = AF_INET6;
2392 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2393 rtm->rtm_src_len = rt->rt6i_src.plen;
2394 rtm->rtm_tos = 0;
c71099ac 2395 if (rt->rt6i_table)
9e762a4a 2396 table = rt->rt6i_table->tb6_id;
c71099ac 2397 else
9e762a4a
PM
2398 table = RT6_TABLE_UNSPEC;
2399 rtm->rtm_table = table;
c78679e8
DM
2400 if (nla_put_u32(skb, RTA_TABLE, table))
2401 goto nla_put_failure;
ef2c7d7b
ND
2402 if (rt->rt6i_flags & RTF_REJECT) {
2403 switch (rt->dst.error) {
2404 case -EINVAL:
2405 rtm->rtm_type = RTN_BLACKHOLE;
2406 break;
2407 case -EACCES:
2408 rtm->rtm_type = RTN_PROHIBIT;
2409 break;
b4949ab2
ND
2410 case -EAGAIN:
2411 rtm->rtm_type = RTN_THROW;
2412 break;
ef2c7d7b
ND
2413 default:
2414 rtm->rtm_type = RTN_UNREACHABLE;
2415 break;
2416 }
2417 }
38308473 2418 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2419 rtm->rtm_type = RTN_LOCAL;
d1918542 2420 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2421 rtm->rtm_type = RTN_LOCAL;
2422 else
2423 rtm->rtm_type = RTN_UNICAST;
2424 rtm->rtm_flags = 0;
2425 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2426 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2427 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 2428 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
2429 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2430 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2431 rtm->rtm_protocol = RTPROT_RA;
2432 else
2433 rtm->rtm_protocol = RTPROT_KERNEL;
2434 }
1da177e4 2435
38308473 2436 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2437 rtm->rtm_flags |= RTM_F_CLONED;
2438
2439 if (dst) {
c78679e8
DM
2440 if (nla_put(skb, RTA_DST, 16, dst))
2441 goto nla_put_failure;
1ab1457c 2442 rtm->rtm_dst_len = 128;
1da177e4 2443 } else if (rtm->rtm_dst_len)
c78679e8
DM
2444 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2445 goto nla_put_failure;
1da177e4
LT
2446#ifdef CONFIG_IPV6_SUBTREES
2447 if (src) {
c78679e8
DM
2448 if (nla_put(skb, RTA_SRC, 16, src))
2449 goto nla_put_failure;
1ab1457c 2450 rtm->rtm_src_len = 128;
c78679e8
DM
2451 } else if (rtm->rtm_src_len &&
2452 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2453 goto nla_put_failure;
1da177e4 2454#endif
7bc570c8
YH
2455 if (iif) {
2456#ifdef CONFIG_IPV6_MROUTE
2457 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2458 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2459 if (err <= 0) {
2460 if (!nowait) {
2461 if (err == 0)
2462 return 0;
2463 goto nla_put_failure;
2464 } else {
2465 if (err == -EMSGSIZE)
2466 goto nla_put_failure;
2467 }
2468 }
2469 } else
2470#endif
c78679e8
DM
2471 if (nla_put_u32(skb, RTA_IIF, iif))
2472 goto nla_put_failure;
7bc570c8 2473 } else if (dst) {
1da177e4 2474 struct in6_addr saddr_buf;
c78679e8
DM
2475 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2476 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2477 goto nla_put_failure;
1da177e4 2478 }
2d7202bf 2479
c3968a85
DW
2480 if (rt->rt6i_prefsrc.plen) {
2481 struct in6_addr saddr_buf;
4e3fd7a0 2482 saddr_buf = rt->rt6i_prefsrc.addr;
c78679e8
DM
2483 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2484 goto nla_put_failure;
c3968a85
DW
2485 }
2486
defb3519 2487 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2488 goto nla_put_failure;
2489
97cac082 2490 n = rt->n;
94f826b8 2491 if (n) {
fdd6681d 2492 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
94f826b8 2493 goto nla_put_failure;
94f826b8 2494 }
2d7202bf 2495
c78679e8
DM
2496 if (rt->dst.dev &&
2497 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2498 goto nla_put_failure;
2499 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2500 goto nla_put_failure;
8253947e
LW
2501
2502 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 2503
87a50699 2504 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 2505 goto nla_put_failure;
2d7202bf
TG
2506
2507 return nlmsg_end(skb, nlh);
2508
2509nla_put_failure:
26932566
PM
2510 nlmsg_cancel(skb, nlh);
2511 return -EMSGSIZE;
1da177e4
LT
2512}
2513
1b43af54 2514int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2515{
2516 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2517 int prefix;
2518
2d7202bf
TG
2519 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2520 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2521 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2522 } else
2523 prefix = 0;
2524
191cd582
BH
2525 return rt6_fill_node(arg->net,
2526 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 2527 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2528 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2529}
2530
c127ea2c 2531static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2532{
3b1e0a65 2533 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2534 struct nlattr *tb[RTA_MAX+1];
2535 struct rt6_info *rt;
1da177e4 2536 struct sk_buff *skb;
ab364a6f 2537 struct rtmsg *rtm;
4c9483b2 2538 struct flowi6 fl6;
72331bc0 2539 int err, iif = 0, oif = 0;
1da177e4 2540
ab364a6f
TG
2541 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2542 if (err < 0)
2543 goto errout;
1da177e4 2544
ab364a6f 2545 err = -EINVAL;
4c9483b2 2546 memset(&fl6, 0, sizeof(fl6));
1da177e4 2547
ab364a6f
TG
2548 if (tb[RTA_SRC]) {
2549 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2550 goto errout;
2551
4e3fd7a0 2552 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2553 }
2554
2555 if (tb[RTA_DST]) {
2556 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2557 goto errout;
2558
4e3fd7a0 2559 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2560 }
2561
2562 if (tb[RTA_IIF])
2563 iif = nla_get_u32(tb[RTA_IIF]);
2564
2565 if (tb[RTA_OIF])
72331bc0 2566 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2567
2568 if (iif) {
2569 struct net_device *dev;
72331bc0
SL
2570 int flags = 0;
2571
5578689a 2572 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2573 if (!dev) {
2574 err = -ENODEV;
ab364a6f 2575 goto errout;
1da177e4 2576 }
72331bc0
SL
2577
2578 fl6.flowi6_iif = iif;
2579
2580 if (!ipv6_addr_any(&fl6.saddr))
2581 flags |= RT6_LOOKUP_F_HAS_SADDR;
2582
2583 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2584 flags);
2585 } else {
2586 fl6.flowi6_oif = oif;
2587
2588 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2589 }
2590
ab364a6f 2591 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2592 if (!skb) {
2173bff5 2593 dst_release(&rt->dst);
ab364a6f
TG
2594 err = -ENOBUFS;
2595 goto errout;
2596 }
1da177e4 2597
ab364a6f
TG
2598 /* Reserve room for dummy headers, this skb can pass
2599 through good chunk of routing engine.
2600 */
459a98ed 2601 skb_reset_mac_header(skb);
ab364a6f 2602 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2603
d8d1f30b 2604 skb_dst_set(skb, &rt->dst);
1da177e4 2605
4c9483b2 2606 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 2607 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 2608 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2609 if (err < 0) {
ab364a6f
TG
2610 kfree_skb(skb);
2611 goto errout;
1da177e4
LT
2612 }
2613
15e47304 2614 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 2615errout:
1da177e4 2616 return err;
1da177e4
LT
2617}
2618
86872cb5 2619void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2620{
2621 struct sk_buff *skb;
5578689a 2622 struct net *net = info->nl_net;
528c4ceb
DL
2623 u32 seq;
2624 int err;
2625
2626 err = -ENOBUFS;
38308473 2627 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2628
339bf98f 2629 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2630 if (!skb)
21713ebc
TG
2631 goto errout;
2632
191cd582 2633 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
15e47304 2634 event, info->portid, seq, 0, 0, 0);
26932566
PM
2635 if (err < 0) {
2636 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2637 WARN_ON(err == -EMSGSIZE);
2638 kfree_skb(skb);
2639 goto errout;
2640 }
15e47304 2641 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
2642 info->nlh, gfp_any());
2643 return;
21713ebc
TG
2644errout:
2645 if (err < 0)
5578689a 2646 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2647}
2648
8ed67789
DL
2649static int ip6_route_dev_notify(struct notifier_block *this,
2650 unsigned long event, void *data)
2651{
2652 struct net_device *dev = (struct net_device *)data;
c346dca1 2653 struct net *net = dev_net(dev);
8ed67789
DL
2654
2655 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2656 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2657 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2658#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2659 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2660 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2661 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2662 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2663#endif
2664 }
2665
2666 return NOTIFY_OK;
2667}
2668
1da177e4
LT
2669/*
2670 * /proc
2671 */
2672
2673#ifdef CONFIG_PROC_FS
2674
1da177e4
LT
2675struct rt6_proc_arg
2676{
2677 char *buffer;
2678 int offset;
2679 int length;
2680 int skip;
2681 int len;
2682};
2683
2684static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2685{
33120b30 2686 struct seq_file *m = p_arg;
69cce1d1 2687 struct neighbour *n;
1da177e4 2688
4b7a4274 2689 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2690
2691#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2692 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2693#else
33120b30 2694 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2695#endif
97cac082 2696 n = rt->n;
69cce1d1
DM
2697 if (n) {
2698 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2699 } else {
33120b30 2700 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2701 }
33120b30 2702 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2703 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2704 rt->dst.__use, rt->rt6i_flags,
d1918542 2705 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2706 return 0;
2707}
2708
33120b30 2709static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2710{
f3db4851 2711 struct net *net = (struct net *)m->private;
32b293a5 2712 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2713 return 0;
2714}
1da177e4 2715
33120b30
AD
2716static int ipv6_route_open(struct inode *inode, struct file *file)
2717{
de05c557 2718 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2719}
2720
33120b30
AD
2721static const struct file_operations ipv6_route_proc_fops = {
2722 .owner = THIS_MODULE,
2723 .open = ipv6_route_open,
2724 .read = seq_read,
2725 .llseek = seq_lseek,
b6fcbdb4 2726 .release = single_release_net,
33120b30
AD
2727};
2728
1da177e4
LT
2729static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2730{
69ddb805 2731 struct net *net = (struct net *)seq->private;
1da177e4 2732 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2733 net->ipv6.rt6_stats->fib_nodes,
2734 net->ipv6.rt6_stats->fib_route_nodes,
2735 net->ipv6.rt6_stats->fib_rt_alloc,
2736 net->ipv6.rt6_stats->fib_rt_entries,
2737 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2738 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2739 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2740
2741 return 0;
2742}
2743
2744static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2745{
de05c557 2746 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2747}
2748
9a32144e 2749static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2750 .owner = THIS_MODULE,
2751 .open = rt6_stats_seq_open,
2752 .read = seq_read,
2753 .llseek = seq_lseek,
b6fcbdb4 2754 .release = single_release_net,
1da177e4
LT
2755};
2756#endif /* CONFIG_PROC_FS */
2757
2758#ifdef CONFIG_SYSCTL
2759
1da177e4 2760static
8d65af78 2761int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2762 void __user *buffer, size_t *lenp, loff_t *ppos)
2763{
c486da34
LAG
2764 struct net *net;
2765 int delay;
2766 if (!write)
1da177e4 2767 return -EINVAL;
c486da34
LAG
2768
2769 net = (struct net *)ctl->extra1;
2770 delay = net->ipv6.sysctl.flush_delay;
2771 proc_dointvec(ctl, write, buffer, lenp, ppos);
2772 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2773 return 0;
1da177e4
LT
2774}
2775
760f2d01 2776ctl_table ipv6_route_table_template[] = {
1ab1457c 2777 {
1da177e4 2778 .procname = "flush",
4990509f 2779 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2780 .maxlen = sizeof(int),
89c8b3a1 2781 .mode = 0200,
6d9f239a 2782 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2783 },
2784 {
1da177e4 2785 .procname = "gc_thresh",
9a7ec3a9 2786 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2787 .maxlen = sizeof(int),
2788 .mode = 0644,
6d9f239a 2789 .proc_handler = proc_dointvec,
1da177e4
LT
2790 },
2791 {
1da177e4 2792 .procname = "max_size",
4990509f 2793 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2794 .maxlen = sizeof(int),
2795 .mode = 0644,
6d9f239a 2796 .proc_handler = proc_dointvec,
1da177e4
LT
2797 },
2798 {
1da177e4 2799 .procname = "gc_min_interval",
4990509f 2800 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2801 .maxlen = sizeof(int),
2802 .mode = 0644,
6d9f239a 2803 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2804 },
2805 {
1da177e4 2806 .procname = "gc_timeout",
4990509f 2807 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2808 .maxlen = sizeof(int),
2809 .mode = 0644,
6d9f239a 2810 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2811 },
2812 {
1da177e4 2813 .procname = "gc_interval",
4990509f 2814 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2815 .maxlen = sizeof(int),
2816 .mode = 0644,
6d9f239a 2817 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2818 },
2819 {
1da177e4 2820 .procname = "gc_elasticity",
4990509f 2821 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2822 .maxlen = sizeof(int),
2823 .mode = 0644,
f3d3f616 2824 .proc_handler = proc_dointvec,
1da177e4
LT
2825 },
2826 {
1da177e4 2827 .procname = "mtu_expires",
4990509f 2828 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2829 .maxlen = sizeof(int),
2830 .mode = 0644,
6d9f239a 2831 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2832 },
2833 {
1da177e4 2834 .procname = "min_adv_mss",
4990509f 2835 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2836 .maxlen = sizeof(int),
2837 .mode = 0644,
f3d3f616 2838 .proc_handler = proc_dointvec,
1da177e4
LT
2839 },
2840 {
1da177e4 2841 .procname = "gc_min_interval_ms",
4990509f 2842 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2843 .maxlen = sizeof(int),
2844 .mode = 0644,
6d9f239a 2845 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2846 },
f8572d8f 2847 { }
1da177e4
LT
2848};
2849
2c8c1e72 2850struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2851{
2852 struct ctl_table *table;
2853
2854 table = kmemdup(ipv6_route_table_template,
2855 sizeof(ipv6_route_table_template),
2856 GFP_KERNEL);
5ee09105
YH
2857
2858 if (table) {
2859 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2860 table[0].extra1 = net;
86393e52 2861 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2862 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2863 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2864 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2865 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2866 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2867 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2868 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2869 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2870 }
2871
760f2d01
DL
2872 return table;
2873}
1da177e4
LT
2874#endif
2875
2c8c1e72 2876static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2877{
633d424b 2878 int ret = -ENOMEM;
8ed67789 2879
86393e52
AD
2880 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2881 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2882
fc66f95c
ED
2883 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2884 goto out_ip6_dst_ops;
2885
8ed67789
DL
2886 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2887 sizeof(*net->ipv6.ip6_null_entry),
2888 GFP_KERNEL);
2889 if (!net->ipv6.ip6_null_entry)
fc66f95c 2890 goto out_ip6_dst_entries;
d8d1f30b 2891 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2892 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2893 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2894 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2895 ip6_template_metrics, true);
8ed67789
DL
2896
2897#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2898 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2899 sizeof(*net->ipv6.ip6_prohibit_entry),
2900 GFP_KERNEL);
68fffc67
PZ
2901 if (!net->ipv6.ip6_prohibit_entry)
2902 goto out_ip6_null_entry;
d8d1f30b 2903 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2904 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2905 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2906 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2907 ip6_template_metrics, true);
8ed67789
DL
2908
2909 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2910 sizeof(*net->ipv6.ip6_blk_hole_entry),
2911 GFP_KERNEL);
68fffc67
PZ
2912 if (!net->ipv6.ip6_blk_hole_entry)
2913 goto out_ip6_prohibit_entry;
d8d1f30b 2914 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2915 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2916 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2917 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2918 ip6_template_metrics, true);
8ed67789
DL
2919#endif
2920
b339a47c
PZ
2921 net->ipv6.sysctl.flush_delay = 0;
2922 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2923 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2924 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2925 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2926 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2927 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2928 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2929
6891a346
BT
2930 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2931
8ed67789
DL
2932 ret = 0;
2933out:
2934 return ret;
f2fc6a54 2935
68fffc67
PZ
2936#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2937out_ip6_prohibit_entry:
2938 kfree(net->ipv6.ip6_prohibit_entry);
2939out_ip6_null_entry:
2940 kfree(net->ipv6.ip6_null_entry);
2941#endif
fc66f95c
ED
2942out_ip6_dst_entries:
2943 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2944out_ip6_dst_ops:
f2fc6a54 2945 goto out;
cdb18761
DL
2946}
2947
2c8c1e72 2948static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 2949{
8ed67789
DL
2950 kfree(net->ipv6.ip6_null_entry);
2951#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2952 kfree(net->ipv6.ip6_prohibit_entry);
2953 kfree(net->ipv6.ip6_blk_hole_entry);
2954#endif
41bb78b4 2955 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2956}
2957
d189634e
TG
2958static int __net_init ip6_route_net_init_late(struct net *net)
2959{
2960#ifdef CONFIG_PROC_FS
2961 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2962 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2963#endif
2964 return 0;
2965}
2966
2967static void __net_exit ip6_route_net_exit_late(struct net *net)
2968{
2969#ifdef CONFIG_PROC_FS
2970 proc_net_remove(net, "ipv6_route");
2971 proc_net_remove(net, "rt6_stats");
2972#endif
2973}
2974
cdb18761
DL
2975static struct pernet_operations ip6_route_net_ops = {
2976 .init = ip6_route_net_init,
2977 .exit = ip6_route_net_exit,
2978};
2979
c3426b47
DM
2980static int __net_init ipv6_inetpeer_init(struct net *net)
2981{
2982 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2983
2984 if (!bp)
2985 return -ENOMEM;
2986 inet_peer_base_init(bp);
2987 net->ipv6.peers = bp;
2988 return 0;
2989}
2990
2991static void __net_exit ipv6_inetpeer_exit(struct net *net)
2992{
2993 struct inet_peer_base *bp = net->ipv6.peers;
2994
2995 net->ipv6.peers = NULL;
56a6b248 2996 inetpeer_invalidate_tree(bp);
c3426b47
DM
2997 kfree(bp);
2998}
2999
2b823f72 3000static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3001 .init = ipv6_inetpeer_init,
3002 .exit = ipv6_inetpeer_exit,
3003};
3004
d189634e
TG
3005static struct pernet_operations ip6_route_net_late_ops = {
3006 .init = ip6_route_net_init_late,
3007 .exit = ip6_route_net_exit_late,
3008};
3009
8ed67789
DL
3010static struct notifier_block ip6_route_dev_notifier = {
3011 .notifier_call = ip6_route_dev_notify,
3012 .priority = 0,
3013};
3014
433d49c3 3015int __init ip6_route_init(void)
1da177e4 3016{
433d49c3
DL
3017 int ret;
3018
9a7ec3a9
DL
3019 ret = -ENOMEM;
3020 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3021 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3022 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3023 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3024 goto out;
14e50e57 3025
fc66f95c 3026 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3027 if (ret)
bdb3289f 3028 goto out_kmem_cache;
bdb3289f 3029
c3426b47
DM
3030 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3031 if (ret)
e8803b6c 3032 goto out_dst_entries;
2a0c451a 3033
7e52b33b
DM
3034 ret = register_pernet_subsys(&ip6_route_net_ops);
3035 if (ret)
3036 goto out_register_inetpeer;
c3426b47 3037
5dc121e9
AE
3038 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3039
8ed67789
DL
3040 /* Registering of the loopback is done before this portion of code,
3041 * the loopback reference in rt6_info will not be taken, do it
3042 * manually for init_net */
d8d1f30b 3043 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3044 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3045 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3046 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3047 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3048 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3049 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3050 #endif
e8803b6c 3051 ret = fib6_init();
433d49c3 3052 if (ret)
8ed67789 3053 goto out_register_subsys;
433d49c3 3054
433d49c3
DL
3055 ret = xfrm6_init();
3056 if (ret)
e8803b6c 3057 goto out_fib6_init;
c35b7e72 3058
433d49c3
DL
3059 ret = fib6_rules_init();
3060 if (ret)
3061 goto xfrm6_init;
7e5449c2 3062
d189634e
TG
3063 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3064 if (ret)
3065 goto fib6_rules_init;
3066
433d49c3 3067 ret = -ENOBUFS;
c7ac8679
GR
3068 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3069 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3070 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3071 goto out_register_late_subsys;
c127ea2c 3072
8ed67789 3073 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3074 if (ret)
d189634e 3075 goto out_register_late_subsys;
8ed67789 3076
433d49c3
DL
3077out:
3078 return ret;
3079
d189634e
TG
3080out_register_late_subsys:
3081 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3082fib6_rules_init:
433d49c3
DL
3083 fib6_rules_cleanup();
3084xfrm6_init:
433d49c3 3085 xfrm6_fini();
2a0c451a
TG
3086out_fib6_init:
3087 fib6_gc_cleanup();
8ed67789
DL
3088out_register_subsys:
3089 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3090out_register_inetpeer:
3091 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3092out_dst_entries:
3093 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3094out_kmem_cache:
f2fc6a54 3095 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3096 goto out;
1da177e4
LT
3097}
3098
3099void ip6_route_cleanup(void)
3100{
8ed67789 3101 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3102 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3103 fib6_rules_cleanup();
1da177e4 3104 xfrm6_fini();
1da177e4 3105 fib6_gc_cleanup();
c3426b47 3106 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3107 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3108 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3109 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3110}