]> git.ipfire.org Git - people/ms/linux.git/blame - net/ipv6/route.c
Merge branch 'vhost-net-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mst...
[people/ms/linux.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
5a0e3ad6 43#include <linux/slab.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
519fbd87 75#define CLONE_OFFLINK_ROUTE 0
1da177e4 76
1da177e4
LT
77static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 79static unsigned int ip6_default_advmss(const struct dst_entry *dst);
1da177e4
LT
80static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81static void ip6_dst_destroy(struct dst_entry *);
82static void ip6_dst_ifdown(struct dst_entry *,
83 struct net_device *dev, int how);
569d3645 84static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
85
86static int ip6_pkt_discard(struct sk_buff *skb);
87static int ip6_pkt_discard_out(struct sk_buff *skb);
88static void ip6_link_failure(struct sk_buff *skb);
89static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90
70ceb4f5 91#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
92static struct rt6_info *rt6_add_route_info(struct net *net,
93 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
94 struct in6_addr *gwaddr, int ifindex,
95 unsigned pref);
efa2cea0
DL
96static struct rt6_info *rt6_get_route_info(struct net *net,
97 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
98 struct in6_addr *gwaddr, int ifindex);
99#endif
100
9a7ec3a9 101static struct dst_ops ip6_dst_ops_template = {
1da177e4 102 .family = AF_INET6,
09640e63 103 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
104 .gc = ip6_dst_gc,
105 .gc_thresh = 1024,
106 .check = ip6_dst_check,
0dbaee3b 107 .default_advmss = ip6_default_advmss,
1da177e4
LT
108 .destroy = ip6_dst_destroy,
109 .ifdown = ip6_dst_ifdown,
110 .negative_advice = ip6_negative_advice,
111 .link_failure = ip6_link_failure,
112 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 113 .local_out = __ip6_local_out,
1da177e4
LT
114};
115
14e50e57
DM
116static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
117{
118}
119
120static struct dst_ops ip6_dst_blackhole_ops = {
121 .family = AF_INET6,
09640e63 122 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
123 .destroy = ip6_dst_destroy,
124 .check = ip6_dst_check,
125 .update_pmtu = ip6_rt_blackhole_update_pmtu,
14e50e57
DM
126};
127
bdb3289f 128static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
129 .dst = {
130 .__refcnt = ATOMIC_INIT(1),
131 .__use = 1,
132 .obsolete = -1,
133 .error = -ENETUNREACH,
d8d1f30b
CG
134 .input = ip6_pkt_discard,
135 .output = ip6_pkt_discard_out,
1da177e4
LT
136 },
137 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 138 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
141};
142
101367c2
TG
143#ifdef CONFIG_IPV6_MULTIPLE_TABLES
144
6723ab54
DM
145static int ip6_pkt_prohibit(struct sk_buff *skb);
146static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 147
280a34c8 148static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
149 .dst = {
150 .__refcnt = ATOMIC_INIT(1),
151 .__use = 1,
152 .obsolete = -1,
153 .error = -EACCES,
d8d1f30b
CG
154 .input = ip6_pkt_prohibit,
155 .output = ip6_pkt_prohibit_out,
101367c2
TG
156 },
157 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 158 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
159 .rt6i_metric = ~(u32) 0,
160 .rt6i_ref = ATOMIC_INIT(1),
161};
162
bdb3289f 163static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
164 .dst = {
165 .__refcnt = ATOMIC_INIT(1),
166 .__use = 1,
167 .obsolete = -1,
168 .error = -EINVAL,
d8d1f30b
CG
169 .input = dst_discard,
170 .output = dst_discard,
101367c2
TG
171 },
172 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 173 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
174 .rt6i_metric = ~(u32) 0,
175 .rt6i_ref = ATOMIC_INIT(1),
176};
177
178#endif
179
1da177e4 180/* allocate dst with ip6_dst_ops */
f2fc6a54 181static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1da177e4 182{
f2fc6a54 183 return (struct rt6_info *)dst_alloc(ops);
1da177e4
LT
184}
185
186static void ip6_dst_destroy(struct dst_entry *dst)
187{
188 struct rt6_info *rt = (struct rt6_info *)dst;
189 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 190 struct inet_peer *peer = rt->rt6i_peer;
1da177e4
LT
191
192 if (idev != NULL) {
193 rt->rt6i_idev = NULL;
194 in6_dev_put(idev);
1ab1457c 195 }
b3419363
DM
196 if (peer) {
197 BUG_ON(!(rt->rt6i_flags & RTF_CACHE));
198 rt->rt6i_peer = NULL;
199 inet_putpeer(peer);
200 }
201}
202
203void rt6_bind_peer(struct rt6_info *rt, int create)
204{
205 struct inet_peer *peer;
206
207 if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE)))
208 return;
209
210 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
211 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
212 inet_putpeer(peer);
1da177e4
LT
213}
214
215static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
216 int how)
217{
218 struct rt6_info *rt = (struct rt6_info *)dst;
219 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 220 struct net_device *loopback_dev =
c346dca1 221 dev_net(dev)->loopback_dev;
1da177e4 222
5a3e55d6
DL
223 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
224 struct inet6_dev *loopback_idev =
225 in6_dev_get(loopback_dev);
1da177e4
LT
226 if (loopback_idev != NULL) {
227 rt->rt6i_idev = loopback_idev;
228 in6_dev_put(idev);
229 }
230 }
231}
232
233static __inline__ int rt6_check_expired(const struct rt6_info *rt)
234{
a02cec21
ED
235 return (rt->rt6i_flags & RTF_EXPIRES) &&
236 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
237}
238
c71099ac
TG
239static inline int rt6_need_strict(struct in6_addr *daddr)
240{
a02cec21
ED
241 return ipv6_addr_type(daddr) &
242 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
243}
244
1da177e4 245/*
c71099ac 246 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
247 */
248
8ed67789
DL
249static inline struct rt6_info *rt6_device_match(struct net *net,
250 struct rt6_info *rt,
dd3abc4e 251 struct in6_addr *saddr,
1da177e4 252 int oif,
d420895e 253 int flags)
1da177e4
LT
254{
255 struct rt6_info *local = NULL;
256 struct rt6_info *sprt;
257
dd3abc4e
YH
258 if (!oif && ipv6_addr_any(saddr))
259 goto out;
260
d8d1f30b 261 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
262 struct net_device *dev = sprt->rt6i_dev;
263
264 if (oif) {
1da177e4
LT
265 if (dev->ifindex == oif)
266 return sprt;
267 if (dev->flags & IFF_LOOPBACK) {
268 if (sprt->rt6i_idev == NULL ||
269 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 270 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 271 continue;
1ab1457c 272 if (local && (!oif ||
1da177e4
LT
273 local->rt6i_idev->dev->ifindex == oif))
274 continue;
275 }
276 local = sprt;
277 }
dd3abc4e
YH
278 } else {
279 if (ipv6_chk_addr(net, saddr, dev,
280 flags & RT6_LOOKUP_F_IFACE))
281 return sprt;
1da177e4 282 }
dd3abc4e 283 }
1da177e4 284
dd3abc4e 285 if (oif) {
1da177e4
LT
286 if (local)
287 return local;
288
d420895e 289 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 290 return net->ipv6.ip6_null_entry;
1da177e4 291 }
dd3abc4e 292out:
1da177e4
LT
293 return rt;
294}
295
27097255
YH
296#ifdef CONFIG_IPV6_ROUTER_PREF
297static void rt6_probe(struct rt6_info *rt)
298{
299 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
300 /*
301 * Okay, this does not seem to be appropriate
302 * for now, however, we need to check if it
303 * is really so; aka Router Reachability Probing.
304 *
305 * Router Reachability Probe MUST be rate-limited
306 * to no more than one per minute.
307 */
308 if (!neigh || (neigh->nud_state & NUD_VALID))
309 return;
310 read_lock_bh(&neigh->lock);
311 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 312 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
313 struct in6_addr mcaddr;
314 struct in6_addr *target;
315
316 neigh->updated = jiffies;
317 read_unlock_bh(&neigh->lock);
318
319 target = (struct in6_addr *)&neigh->primary_key;
320 addrconf_addr_solict_mult(target, &mcaddr);
321 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
322 } else
323 read_unlock_bh(&neigh->lock);
324}
325#else
326static inline void rt6_probe(struct rt6_info *rt)
327{
27097255
YH
328}
329#endif
330
1da177e4 331/*
554cfb7e 332 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 333 */
b6f99a21 334static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
335{
336 struct net_device *dev = rt->rt6i_dev;
161980f4 337 if (!oif || dev->ifindex == oif)
554cfb7e 338 return 2;
161980f4
DM
339 if ((dev->flags & IFF_LOOPBACK) &&
340 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
341 return 1;
342 return 0;
554cfb7e 343}
1da177e4 344
b6f99a21 345static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 346{
554cfb7e 347 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 348 int m;
4d0c5911
YH
349 if (rt->rt6i_flags & RTF_NONEXTHOP ||
350 !(rt->rt6i_flags & RTF_GATEWAY))
351 m = 1;
352 else if (neigh) {
554cfb7e
YH
353 read_lock_bh(&neigh->lock);
354 if (neigh->nud_state & NUD_VALID)
4d0c5911 355 m = 2;
398bcbeb
YH
356#ifdef CONFIG_IPV6_ROUTER_PREF
357 else if (neigh->nud_state & NUD_FAILED)
358 m = 0;
359#endif
360 else
ea73ee23 361 m = 1;
554cfb7e 362 read_unlock_bh(&neigh->lock);
398bcbeb
YH
363 } else
364 m = 0;
554cfb7e 365 return m;
1da177e4
LT
366}
367
554cfb7e
YH
368static int rt6_score_route(struct rt6_info *rt, int oif,
369 int strict)
1da177e4 370{
4d0c5911 371 int m, n;
1ab1457c 372
4d0c5911 373 m = rt6_check_dev(rt, oif);
77d16f45 374 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 375 return -1;
ebacaaa0
YH
376#ifdef CONFIG_IPV6_ROUTER_PREF
377 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
378#endif
4d0c5911 379 n = rt6_check_neigh(rt);
557e92ef 380 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
381 return -1;
382 return m;
383}
384
f11e6659
DM
385static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
386 int *mpri, struct rt6_info *match)
554cfb7e 387{
f11e6659
DM
388 int m;
389
390 if (rt6_check_expired(rt))
391 goto out;
392
393 m = rt6_score_route(rt, oif, strict);
394 if (m < 0)
395 goto out;
396
397 if (m > *mpri) {
398 if (strict & RT6_LOOKUP_F_REACHABLE)
399 rt6_probe(match);
400 *mpri = m;
401 match = rt;
402 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
403 rt6_probe(rt);
404 }
405
406out:
407 return match;
408}
409
410static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
411 struct rt6_info *rr_head,
412 u32 metric, int oif, int strict)
413{
414 struct rt6_info *rt, *match;
554cfb7e 415 int mpri = -1;
1da177e4 416
f11e6659
DM
417 match = NULL;
418 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 419 rt = rt->dst.rt6_next)
f11e6659
DM
420 match = find_match(rt, oif, strict, &mpri, match);
421 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 422 rt = rt->dst.rt6_next)
f11e6659 423 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 424
f11e6659
DM
425 return match;
426}
1da177e4 427
f11e6659
DM
428static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
429{
430 struct rt6_info *match, *rt0;
8ed67789 431 struct net *net;
1da177e4 432
f11e6659 433 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 434 __func__, fn->leaf, oif);
554cfb7e 435
f11e6659
DM
436 rt0 = fn->rr_ptr;
437 if (!rt0)
438 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 439
f11e6659 440 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 441
554cfb7e 442 if (!match &&
f11e6659 443 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 444 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 445
554cfb7e 446 /* no entries matched; do round-robin */
f11e6659
DM
447 if (!next || next->rt6i_metric != rt0->rt6i_metric)
448 next = fn->leaf;
449
450 if (next != rt0)
451 fn->rr_ptr = next;
1da177e4 452 }
1da177e4 453
f11e6659 454 RT6_TRACE("%s() => %p\n",
0dc47877 455 __func__, match);
1da177e4 456
c346dca1 457 net = dev_net(rt0->rt6i_dev);
a02cec21 458 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
459}
460
70ceb4f5
YH
461#ifdef CONFIG_IPV6_ROUTE_INFO
462int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
463 struct in6_addr *gwaddr)
464{
c346dca1 465 struct net *net = dev_net(dev);
70ceb4f5
YH
466 struct route_info *rinfo = (struct route_info *) opt;
467 struct in6_addr prefix_buf, *prefix;
468 unsigned int pref;
4bed72e4 469 unsigned long lifetime;
70ceb4f5
YH
470 struct rt6_info *rt;
471
472 if (len < sizeof(struct route_info)) {
473 return -EINVAL;
474 }
475
476 /* Sanity check for prefix_len and length */
477 if (rinfo->length > 3) {
478 return -EINVAL;
479 } else if (rinfo->prefix_len > 128) {
480 return -EINVAL;
481 } else if (rinfo->prefix_len > 64) {
482 if (rinfo->length < 2) {
483 return -EINVAL;
484 }
485 } else if (rinfo->prefix_len > 0) {
486 if (rinfo->length < 1) {
487 return -EINVAL;
488 }
489 }
490
491 pref = rinfo->route_pref;
492 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 493 return -EINVAL;
70ceb4f5 494
4bed72e4 495 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
496
497 if (rinfo->length == 3)
498 prefix = (struct in6_addr *)rinfo->prefix;
499 else {
500 /* this function is safe */
501 ipv6_addr_prefix(&prefix_buf,
502 (struct in6_addr *)rinfo->prefix,
503 rinfo->prefix_len);
504 prefix = &prefix_buf;
505 }
506
efa2cea0
DL
507 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
508 dev->ifindex);
70ceb4f5
YH
509
510 if (rt && !lifetime) {
e0a1ad73 511 ip6_del_rt(rt);
70ceb4f5
YH
512 rt = NULL;
513 }
514
515 if (!rt && lifetime)
efa2cea0 516 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
517 pref);
518 else if (rt)
519 rt->rt6i_flags = RTF_ROUTEINFO |
520 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
521
522 if (rt) {
4bed72e4 523 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
524 rt->rt6i_flags &= ~RTF_EXPIRES;
525 } else {
526 rt->rt6i_expires = jiffies + HZ * lifetime;
527 rt->rt6i_flags |= RTF_EXPIRES;
528 }
d8d1f30b 529 dst_release(&rt->dst);
70ceb4f5
YH
530 }
531 return 0;
532}
533#endif
534
8ed67789 535#define BACKTRACK(__net, saddr) \
982f56f3 536do { \
8ed67789 537 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 538 struct fib6_node *pn; \
e0eda7bb 539 while (1) { \
982f56f3
YH
540 if (fn->fn_flags & RTN_TL_ROOT) \
541 goto out; \
542 pn = fn->parent; \
543 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 544 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
545 else \
546 fn = pn; \
547 if (fn->fn_flags & RTN_RTINFO) \
548 goto restart; \
c71099ac 549 } \
c71099ac 550 } \
982f56f3 551} while(0)
c71099ac 552
8ed67789
DL
553static struct rt6_info *ip6_pol_route_lookup(struct net *net,
554 struct fib6_table *table,
c71099ac 555 struct flowi *fl, int flags)
1da177e4
LT
556{
557 struct fib6_node *fn;
558 struct rt6_info *rt;
559
c71099ac
TG
560 read_lock_bh(&table->tb6_lock);
561 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
562restart:
563 rt = fn->leaf;
dd3abc4e 564 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
8ed67789 565 BACKTRACK(net, &fl->fl6_src);
c71099ac 566out:
d8d1f30b 567 dst_use(&rt->dst, jiffies);
c71099ac 568 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
569 return rt;
570
571}
572
9acd9f3a
YH
573struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
574 const struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
575{
576 struct flowi fl = {
577 .oif = oif,
5811662b 578 .fl6_dst = *daddr,
c71099ac
TG
579 };
580 struct dst_entry *dst;
77d16f45 581 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 582
adaa70bb
TG
583 if (saddr) {
584 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
585 flags |= RT6_LOOKUP_F_HAS_SADDR;
586 }
587
606a2b48 588 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
589 if (dst->error == 0)
590 return (struct rt6_info *) dst;
591
592 dst_release(dst);
593
1da177e4
LT
594 return NULL;
595}
596
7159039a
YH
597EXPORT_SYMBOL(rt6_lookup);
598
c71099ac 599/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
600 It takes new route entry, the addition fails by any reason the
601 route is freed. In any case, if caller does not hold it, it may
602 be destroyed.
603 */
604
86872cb5 605static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
606{
607 int err;
c71099ac 608 struct fib6_table *table;
1da177e4 609
c71099ac
TG
610 table = rt->rt6i_table;
611 write_lock_bh(&table->tb6_lock);
86872cb5 612 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 613 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
614
615 return err;
616}
617
40e22e8f
TG
618int ip6_ins_rt(struct rt6_info *rt)
619{
4d1169c1 620 struct nl_info info = {
c346dca1 621 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 622 };
528c4ceb 623 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
624}
625
95a9a5ba
YH
626static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
627 struct in6_addr *saddr)
1da177e4 628{
1da177e4
LT
629 struct rt6_info *rt;
630
631 /*
632 * Clone the route.
633 */
634
635 rt = ip6_rt_copy(ort);
636
637 if (rt) {
14deae41
DM
638 struct neighbour *neigh;
639 int attempts = !in_softirq();
640
58c4fb86
YH
641 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
642 if (rt->rt6i_dst.plen != 128 &&
643 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
644 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 645 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 646 }
1da177e4 647
58c4fb86 648 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
649 rt->rt6i_dst.plen = 128;
650 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 651 rt->dst.flags |= DST_HOST;
1da177e4
LT
652
653#ifdef CONFIG_IPV6_SUBTREES
654 if (rt->rt6i_src.plen && saddr) {
655 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
656 rt->rt6i_src.plen = 128;
657 }
658#endif
659
14deae41
DM
660 retry:
661 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
662 if (IS_ERR(neigh)) {
663 struct net *net = dev_net(rt->rt6i_dev);
664 int saved_rt_min_interval =
665 net->ipv6.sysctl.ip6_rt_gc_min_interval;
666 int saved_rt_elasticity =
667 net->ipv6.sysctl.ip6_rt_gc_elasticity;
668
669 if (attempts-- > 0) {
670 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
671 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
672
86393e52 673 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
674
675 net->ipv6.sysctl.ip6_rt_gc_elasticity =
676 saved_rt_elasticity;
677 net->ipv6.sysctl.ip6_rt_gc_min_interval =
678 saved_rt_min_interval;
679 goto retry;
680 }
681
682 if (net_ratelimit())
683 printk(KERN_WARNING
7e1b33e5 684 "ipv6: Neighbour table overflow.\n");
d8d1f30b 685 dst_free(&rt->dst);
14deae41
DM
686 return NULL;
687 }
688 rt->rt6i_nexthop = neigh;
1da177e4 689
95a9a5ba 690 }
1da177e4 691
95a9a5ba
YH
692 return rt;
693}
1da177e4 694
299d9939
YH
695static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
696{
697 struct rt6_info *rt = ip6_rt_copy(ort);
698 if (rt) {
699 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
700 rt->rt6i_dst.plen = 128;
701 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 702 rt->dst.flags |= DST_HOST;
299d9939
YH
703 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
704 }
705 return rt;
706}
707
8ed67789
DL
708static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
709 struct flowi *fl, int flags)
1da177e4
LT
710{
711 struct fib6_node *fn;
519fbd87 712 struct rt6_info *rt, *nrt;
c71099ac 713 int strict = 0;
1da177e4 714 int attempts = 3;
519fbd87 715 int err;
53b7997f 716 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 717
77d16f45 718 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
719
720relookup:
c71099ac 721 read_lock_bh(&table->tb6_lock);
1da177e4 722
8238dd06 723restart_2:
c71099ac 724 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
725
726restart:
4acad72d 727 rt = rt6_select(fn, oif, strict | reachable);
8ed67789
DL
728
729 BACKTRACK(net, &fl->fl6_src);
730 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 731 rt->rt6i_flags & RTF_CACHE)
1ddef044 732 goto out;
1da177e4 733
d8d1f30b 734 dst_hold(&rt->dst);
c71099ac 735 read_unlock_bh(&table->tb6_lock);
fb9de91e 736
519fbd87 737 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 738 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
739 else {
740#if CLONE_OFFLINK_ROUTE
c71099ac 741 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
742#else
743 goto out2;
744#endif
745 }
e40cf353 746
d8d1f30b 747 dst_release(&rt->dst);
8ed67789 748 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 749
d8d1f30b 750 dst_hold(&rt->dst);
519fbd87 751 if (nrt) {
40e22e8f 752 err = ip6_ins_rt(nrt);
519fbd87 753 if (!err)
1da177e4 754 goto out2;
1da177e4 755 }
1da177e4 756
519fbd87
YH
757 if (--attempts <= 0)
758 goto out2;
759
760 /*
c71099ac 761 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
762 * released someone could insert this route. Relookup.
763 */
d8d1f30b 764 dst_release(&rt->dst);
519fbd87
YH
765 goto relookup;
766
767out:
8238dd06
YH
768 if (reachable) {
769 reachable = 0;
770 goto restart_2;
771 }
d8d1f30b 772 dst_hold(&rt->dst);
c71099ac 773 read_unlock_bh(&table->tb6_lock);
1da177e4 774out2:
d8d1f30b
CG
775 rt->dst.lastuse = jiffies;
776 rt->dst.__use++;
c71099ac
TG
777
778 return rt;
1da177e4
LT
779}
780
8ed67789 781static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4acad72d
PE
782 struct flowi *fl, int flags)
783{
8ed67789 784 return ip6_pol_route(net, table, fl->iif, fl, flags);
4acad72d
PE
785}
786
c71099ac
TG
787void ip6_route_input(struct sk_buff *skb)
788{
0660e03f 789 struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 790 struct net *net = dev_net(skb->dev);
adaa70bb 791 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
792 struct flowi fl = {
793 .iif = skb->dev->ifindex,
5811662b
CG
794 .fl6_dst = iph->daddr,
795 .fl6_src = iph->saddr,
796 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
1ab1457c 797 .mark = skb->mark,
c71099ac
TG
798 .proto = iph->nexthdr,
799 };
adaa70bb 800
1d6e55f1 801 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 802 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 803
adf30907 804 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
c71099ac
TG
805}
806
8ed67789 807static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
c71099ac 808 struct flowi *fl, int flags)
1da177e4 809{
8ed67789 810 return ip6_pol_route(net, table, fl->oif, fl, flags);
c71099ac
TG
811}
812
4591db4f
DL
813struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
814 struct flowi *fl)
c71099ac
TG
815{
816 int flags = 0;
817
6057fd78 818 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
77d16f45 819 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 820
adaa70bb
TG
821 if (!ipv6_addr_any(&fl->fl6_src))
822 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
823 else if (sk)
824 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 825
4591db4f 826 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
1da177e4
LT
827}
828
7159039a 829EXPORT_SYMBOL(ip6_route_output);
1da177e4 830
14e50e57
DM
831int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
832{
833 struct rt6_info *ort = (struct rt6_info *) *dstp;
834 struct rt6_info *rt = (struct rt6_info *)
835 dst_alloc(&ip6_dst_blackhole_ops);
836 struct dst_entry *new = NULL;
837
838 if (rt) {
d8d1f30b 839 new = &rt->dst;
14e50e57
DM
840
841 atomic_set(&new->__refcnt, 1);
842 new->__use = 1;
352e512c
HX
843 new->input = dst_discard;
844 new->output = dst_discard;
14e50e57 845
defb3519 846 dst_copy_metrics(new, &ort->dst);
d8d1f30b 847 new->dev = ort->dst.dev;
14e50e57
DM
848 if (new->dev)
849 dev_hold(new->dev);
850 rt->rt6i_idev = ort->rt6i_idev;
851 if (rt->rt6i_idev)
852 in6_dev_hold(rt->rt6i_idev);
853 rt->rt6i_expires = 0;
854
855 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
856 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
857 rt->rt6i_metric = 0;
858
859 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
860#ifdef CONFIG_IPV6_SUBTREES
861 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
862#endif
863
864 dst_free(new);
865 }
866
867 dst_release(*dstp);
868 *dstp = new;
a02cec21 869 return new ? 0 : -ENOMEM;
14e50e57
DM
870}
871EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
872
1da177e4
LT
873/*
874 * Destination cache support functions
875 */
876
877static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
878{
879 struct rt6_info *rt;
880
881 rt = (struct rt6_info *) dst;
882
10414444 883 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1da177e4
LT
884 return dst;
885
886 return NULL;
887}
888
889static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
890{
891 struct rt6_info *rt = (struct rt6_info *) dst;
892
893 if (rt) {
54c1a859
YH
894 if (rt->rt6i_flags & RTF_CACHE) {
895 if (rt6_check_expired(rt)) {
896 ip6_del_rt(rt);
897 dst = NULL;
898 }
899 } else {
1da177e4 900 dst_release(dst);
54c1a859
YH
901 dst = NULL;
902 }
1da177e4 903 }
54c1a859 904 return dst;
1da177e4
LT
905}
906
907static void ip6_link_failure(struct sk_buff *skb)
908{
909 struct rt6_info *rt;
910
3ffe533c 911 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 912
adf30907 913 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
914 if (rt) {
915 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 916 dst_set_expires(&rt->dst, 0);
1da177e4
LT
917 rt->rt6i_flags |= RTF_EXPIRES;
918 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
919 rt->rt6i_node->fn_sernum = -1;
920 }
921}
922
923static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
924{
925 struct rt6_info *rt6 = (struct rt6_info*)dst;
926
927 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
928 rt6->rt6i_flags |= RTF_MODIFIED;
929 if (mtu < IPV6_MIN_MTU) {
defb3519 930 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 931 mtu = IPV6_MIN_MTU;
defb3519
DM
932 features |= RTAX_FEATURE_ALLFRAG;
933 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 934 }
defb3519 935 dst_metric_set(dst, RTAX_MTU, mtu);
8d71740c 936 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
937 }
938}
939
1da177e4
LT
940static int ipv6_get_mtu(struct net_device *dev);
941
0dbaee3b 942static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 943{
0dbaee3b
DM
944 struct net_device *dev = dst->dev;
945 unsigned int mtu = dst_mtu(dst);
946 struct net *net = dev_net(dev);
947
1da177e4
LT
948 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
949
5578689a
DL
950 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
951 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
952
953 /*
1ab1457c
YH
954 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
955 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
956 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
957 * rely only on pmtu discovery"
958 */
959 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
960 mtu = IPV6_MAXPLEN;
961 return mtu;
962}
963
3b00944c
YH
964static struct dst_entry *icmp6_dst_gc_list;
965static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 966
3b00944c 967struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 968 struct neighbour *neigh,
9acd9f3a 969 const struct in6_addr *addr)
1da177e4
LT
970{
971 struct rt6_info *rt;
972 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 973 struct net *net = dev_net(dev);
1da177e4
LT
974
975 if (unlikely(idev == NULL))
976 return NULL;
977
86393e52 978 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
979 if (unlikely(rt == NULL)) {
980 in6_dev_put(idev);
981 goto out;
982 }
983
984 dev_hold(dev);
985 if (neigh)
986 neigh_hold(neigh);
14deae41 987 else {
1da177e4 988 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
989 if (IS_ERR(neigh))
990 neigh = NULL;
991 }
1da177e4
LT
992
993 rt->rt6i_dev = dev;
994 rt->rt6i_idev = idev;
995 rt->rt6i_nexthop = neigh;
d8d1f30b 996 atomic_set(&rt->dst.__refcnt, 1);
defb3519
DM
997 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
998 dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev));
d8d1f30b 999 rt->dst.output = ip6_output;
1da177e4
LT
1000
1001#if 0 /* there's no chance to use these for ndisc */
d8d1f30b 1002 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1ab1457c 1003 ? DST_HOST
1da177e4
LT
1004 : 0;
1005 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1006 rt->rt6i_dst.plen = 128;
1007#endif
1008
3b00944c 1009 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1010 rt->dst.next = icmp6_dst_gc_list;
1011 icmp6_dst_gc_list = &rt->dst;
3b00944c 1012 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1013
5578689a 1014 fib6_force_start_gc(net);
1da177e4
LT
1015
1016out:
d8d1f30b 1017 return &rt->dst;
1da177e4
LT
1018}
1019
3d0f24a7 1020int icmp6_dst_gc(void)
1da177e4
LT
1021{
1022 struct dst_entry *dst, *next, **pprev;
3d0f24a7 1023 int more = 0;
1da177e4
LT
1024
1025 next = NULL;
5d0bbeeb 1026
3b00944c
YH
1027 spin_lock_bh(&icmp6_dst_lock);
1028 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1029
1da177e4
LT
1030 while ((dst = *pprev) != NULL) {
1031 if (!atomic_read(&dst->__refcnt)) {
1032 *pprev = dst->next;
1033 dst_free(dst);
1da177e4
LT
1034 } else {
1035 pprev = &dst->next;
3d0f24a7 1036 ++more;
1da177e4
LT
1037 }
1038 }
1039
3b00944c 1040 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1041
3d0f24a7 1042 return more;
1da177e4
LT
1043}
1044
1e493d19
DM
1045static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1046 void *arg)
1047{
1048 struct dst_entry *dst, **pprev;
1049
1050 spin_lock_bh(&icmp6_dst_lock);
1051 pprev = &icmp6_dst_gc_list;
1052 while ((dst = *pprev) != NULL) {
1053 struct rt6_info *rt = (struct rt6_info *) dst;
1054 if (func(rt, arg)) {
1055 *pprev = dst->next;
1056 dst_free(dst);
1057 } else {
1058 pprev = &dst->next;
1059 }
1060 }
1061 spin_unlock_bh(&icmp6_dst_lock);
1062}
1063
569d3645 1064static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1065{
1da177e4 1066 unsigned long now = jiffies;
86393e52 1067 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1068 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1069 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1070 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1071 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1072 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1073 int entries;
7019b78e 1074
fc66f95c 1075 entries = dst_entries_get_fast(ops);
7019b78e 1076 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1077 entries <= rt_max_size)
1da177e4
LT
1078 goto out;
1079
6891a346
BT
1080 net->ipv6.ip6_rt_gc_expire++;
1081 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1082 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1083 entries = dst_entries_get_slow(ops);
1084 if (entries < ops->gc_thresh)
7019b78e 1085 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1086out:
7019b78e 1087 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1088 return entries > rt_max_size;
1da177e4
LT
1089}
1090
1091/* Clean host part of a prefix. Not necessary in radix tree,
1092 but results in cleaner routing tables.
1093
1094 Remove it only when all the things will work!
1095 */
1096
1097static int ipv6_get_mtu(struct net_device *dev)
1098{
1099 int mtu = IPV6_MIN_MTU;
1100 struct inet6_dev *idev;
1101
c68f24cc
ED
1102 rcu_read_lock();
1103 idev = __in6_dev_get(dev);
1104 if (idev)
1da177e4 1105 mtu = idev->cnf.mtu6;
c68f24cc 1106 rcu_read_unlock();
1da177e4
LT
1107 return mtu;
1108}
1109
6b75d090 1110int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1111{
5170ae82 1112 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1113 if (hoplimit == 0) {
6b75d090 1114 struct net_device *dev = dst->dev;
c68f24cc
ED
1115 struct inet6_dev *idev;
1116
1117 rcu_read_lock();
1118 idev = __in6_dev_get(dev);
1119 if (idev)
6b75d090 1120 hoplimit = idev->cnf.hop_limit;
c68f24cc 1121 else
53b7997f 1122 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1123 rcu_read_unlock();
1da177e4
LT
1124 }
1125 return hoplimit;
1126}
abbf46ae 1127EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1128
1129/*
1130 *
1131 */
1132
86872cb5 1133int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1134{
1135 int err;
5578689a 1136 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1137 struct rt6_info *rt = NULL;
1138 struct net_device *dev = NULL;
1139 struct inet6_dev *idev = NULL;
c71099ac 1140 struct fib6_table *table;
1da177e4
LT
1141 int addr_type;
1142
86872cb5 1143 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1144 return -EINVAL;
1145#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1146 if (cfg->fc_src_len)
1da177e4
LT
1147 return -EINVAL;
1148#endif
86872cb5 1149 if (cfg->fc_ifindex) {
1da177e4 1150 err = -ENODEV;
5578689a 1151 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1152 if (!dev)
1153 goto out;
1154 idev = in6_dev_get(dev);
1155 if (!idev)
1156 goto out;
1157 }
1158
86872cb5
TG
1159 if (cfg->fc_metric == 0)
1160 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1161
5578689a 1162 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1163 if (table == NULL) {
1164 err = -ENOBUFS;
1165 goto out;
1166 }
1167
86393e52 1168 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1169
1170 if (rt == NULL) {
1171 err = -ENOMEM;
1172 goto out;
1173 }
1174
d8d1f30b 1175 rt->dst.obsolete = -1;
6f704992
YH
1176 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1177 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1178 0;
1da177e4 1179
86872cb5
TG
1180 if (cfg->fc_protocol == RTPROT_UNSPEC)
1181 cfg->fc_protocol = RTPROT_BOOT;
1182 rt->rt6i_protocol = cfg->fc_protocol;
1183
1184 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1185
1186 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1187 rt->dst.input = ip6_mc_input;
ab79ad14
1188 else if (cfg->fc_flags & RTF_LOCAL)
1189 rt->dst.input = ip6_input;
1da177e4 1190 else
d8d1f30b 1191 rt->dst.input = ip6_forward;
1da177e4 1192
d8d1f30b 1193 rt->dst.output = ip6_output;
1da177e4 1194
86872cb5
TG
1195 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1196 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1197 if (rt->rt6i_dst.plen == 128)
d8d1f30b 1198 rt->dst.flags = DST_HOST;
1da177e4
LT
1199
1200#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1201 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1202 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1203#endif
1204
86872cb5 1205 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1206
1207 /* We cannot add true routes via loopback here,
1208 they would result in kernel looping; promote them to reject routes
1209 */
86872cb5 1210 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1211 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1212 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1213 /* hold loopback dev/idev if we haven't done so. */
5578689a 1214 if (dev != net->loopback_dev) {
1da177e4
LT
1215 if (dev) {
1216 dev_put(dev);
1217 in6_dev_put(idev);
1218 }
5578689a 1219 dev = net->loopback_dev;
1da177e4
LT
1220 dev_hold(dev);
1221 idev = in6_dev_get(dev);
1222 if (!idev) {
1223 err = -ENODEV;
1224 goto out;
1225 }
1226 }
d8d1f30b
CG
1227 rt->dst.output = ip6_pkt_discard_out;
1228 rt->dst.input = ip6_pkt_discard;
1229 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1230 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1231 goto install_route;
1232 }
1233
86872cb5 1234 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1235 struct in6_addr *gw_addr;
1236 int gwa_type;
1237
86872cb5
TG
1238 gw_addr = &cfg->fc_gateway;
1239 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1240 gwa_type = ipv6_addr_type(gw_addr);
1241
1242 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1243 struct rt6_info *grt;
1244
1245 /* IPv6 strictly inhibits using not link-local
1246 addresses as nexthop address.
1247 Otherwise, router will not able to send redirects.
1248 It is very good, but in some (rare!) circumstances
1249 (SIT, PtP, NBMA NOARP links) it is handy to allow
1250 some exceptions. --ANK
1251 */
1252 err = -EINVAL;
1253 if (!(gwa_type&IPV6_ADDR_UNICAST))
1254 goto out;
1255
5578689a 1256 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1257
1258 err = -EHOSTUNREACH;
1259 if (grt == NULL)
1260 goto out;
1261 if (dev) {
1262 if (dev != grt->rt6i_dev) {
d8d1f30b 1263 dst_release(&grt->dst);
1da177e4
LT
1264 goto out;
1265 }
1266 } else {
1267 dev = grt->rt6i_dev;
1268 idev = grt->rt6i_idev;
1269 dev_hold(dev);
1270 in6_dev_hold(grt->rt6i_idev);
1271 }
1272 if (!(grt->rt6i_flags&RTF_GATEWAY))
1273 err = 0;
d8d1f30b 1274 dst_release(&grt->dst);
1da177e4
LT
1275
1276 if (err)
1277 goto out;
1278 }
1279 err = -EINVAL;
1280 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1281 goto out;
1282 }
1283
1284 err = -ENODEV;
1285 if (dev == NULL)
1286 goto out;
1287
86872cb5 1288 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1289 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1290 if (IS_ERR(rt->rt6i_nexthop)) {
1291 err = PTR_ERR(rt->rt6i_nexthop);
1292 rt->rt6i_nexthop = NULL;
1293 goto out;
1294 }
1295 }
1296
86872cb5 1297 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1298
1299install_route:
86872cb5
TG
1300 if (cfg->fc_mx) {
1301 struct nlattr *nla;
1302 int remaining;
1303
1304 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1305 int type = nla_type(nla);
86872cb5
TG
1306
1307 if (type) {
1308 if (type > RTAX_MAX) {
1da177e4
LT
1309 err = -EINVAL;
1310 goto out;
1311 }
86872cb5 1312
defb3519 1313 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1314 }
1da177e4
LT
1315 }
1316 }
1317
d8d1f30b 1318 if (!dst_mtu(&rt->dst))
defb3519 1319 dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev));
d8d1f30b 1320 rt->dst.dev = dev;
1da177e4 1321 rt->rt6i_idev = idev;
c71099ac 1322 rt->rt6i_table = table;
63152fc0 1323
c346dca1 1324 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1325
86872cb5 1326 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1327
1328out:
1329 if (dev)
1330 dev_put(dev);
1331 if (idev)
1332 in6_dev_put(idev);
1333 if (rt)
d8d1f30b 1334 dst_free(&rt->dst);
1da177e4
LT
1335 return err;
1336}
1337
86872cb5 1338static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1339{
1340 int err;
c71099ac 1341 struct fib6_table *table;
c346dca1 1342 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1343
8ed67789 1344 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1345 return -ENOENT;
1346
c71099ac
TG
1347 table = rt->rt6i_table;
1348 write_lock_bh(&table->tb6_lock);
1da177e4 1349
86872cb5 1350 err = fib6_del(rt, info);
d8d1f30b 1351 dst_release(&rt->dst);
1da177e4 1352
c71099ac 1353 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1354
1355 return err;
1356}
1357
e0a1ad73
TG
1358int ip6_del_rt(struct rt6_info *rt)
1359{
4d1169c1 1360 struct nl_info info = {
c346dca1 1361 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1362 };
528c4ceb 1363 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1364}
1365
86872cb5 1366static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1367{
c71099ac 1368 struct fib6_table *table;
1da177e4
LT
1369 struct fib6_node *fn;
1370 struct rt6_info *rt;
1371 int err = -ESRCH;
1372
5578689a 1373 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1374 if (table == NULL)
1375 return err;
1376
1377 read_lock_bh(&table->tb6_lock);
1da177e4 1378
c71099ac 1379 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1380 &cfg->fc_dst, cfg->fc_dst_len,
1381 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1382
1da177e4 1383 if (fn) {
d8d1f30b 1384 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1385 if (cfg->fc_ifindex &&
1da177e4 1386 (rt->rt6i_dev == NULL ||
86872cb5 1387 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1388 continue;
86872cb5
TG
1389 if (cfg->fc_flags & RTF_GATEWAY &&
1390 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1391 continue;
86872cb5 1392 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1393 continue;
d8d1f30b 1394 dst_hold(&rt->dst);
c71099ac 1395 read_unlock_bh(&table->tb6_lock);
1da177e4 1396
86872cb5 1397 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1398 }
1399 }
c71099ac 1400 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1401
1402 return err;
1403}
1404
1405/*
1406 * Handle redirects
1407 */
a6279458
YH
1408struct ip6rd_flowi {
1409 struct flowi fl;
1410 struct in6_addr gateway;
1411};
1412
8ed67789
DL
1413static struct rt6_info *__ip6_route_redirect(struct net *net,
1414 struct fib6_table *table,
a6279458
YH
1415 struct flowi *fl,
1416 int flags)
1da177e4 1417{
a6279458
YH
1418 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1419 struct rt6_info *rt;
e843b9e1 1420 struct fib6_node *fn;
c71099ac 1421
1da177e4 1422 /*
e843b9e1
YH
1423 * Get the "current" route for this destination and
1424 * check if the redirect has come from approriate router.
1425 *
1426 * RFC 2461 specifies that redirects should only be
1427 * accepted if they come from the nexthop to the target.
1428 * Due to the way the routes are chosen, this notion
1429 * is a bit fuzzy and one might need to check all possible
1430 * routes.
1da177e4 1431 */
1da177e4 1432
c71099ac 1433 read_lock_bh(&table->tb6_lock);
a6279458 1434 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1435restart:
d8d1f30b 1436 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1437 /*
1438 * Current route is on-link; redirect is always invalid.
1439 *
1440 * Seems, previous statement is not true. It could
1441 * be node, which looks for us as on-link (f.e. proxy ndisc)
1442 * But then router serving it might decide, that we should
1443 * know truth 8)8) --ANK (980726).
1444 */
1445 if (rt6_check_expired(rt))
1446 continue;
1447 if (!(rt->rt6i_flags & RTF_GATEWAY))
1448 continue;
a6279458 1449 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1450 continue;
a6279458 1451 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1452 continue;
1453 break;
1454 }
a6279458 1455
cb15d9c2 1456 if (!rt)
8ed67789
DL
1457 rt = net->ipv6.ip6_null_entry;
1458 BACKTRACK(net, &fl->fl6_src);
cb15d9c2 1459out:
d8d1f30b 1460 dst_hold(&rt->dst);
a6279458 1461
c71099ac 1462 read_unlock_bh(&table->tb6_lock);
e843b9e1 1463
a6279458
YH
1464 return rt;
1465};
1466
1467static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1468 struct in6_addr *src,
1469 struct in6_addr *gateway,
1470 struct net_device *dev)
1471{
adaa70bb 1472 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1473 struct net *net = dev_net(dev);
a6279458
YH
1474 struct ip6rd_flowi rdfl = {
1475 .fl = {
1476 .oif = dev->ifindex,
5811662b
CG
1477 .fl6_dst = *dest,
1478 .fl6_src = *src,
a6279458 1479 },
a6279458 1480 };
adaa70bb 1481
86c36ce4
BH
1482 ipv6_addr_copy(&rdfl.gateway, gateway);
1483
adaa70bb
TG
1484 if (rt6_need_strict(dest))
1485 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1486
5578689a 1487 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
58f09b78 1488 flags, __ip6_route_redirect);
a6279458
YH
1489}
1490
1491void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1492 struct in6_addr *saddr,
1493 struct neighbour *neigh, u8 *lladdr, int on_link)
1494{
1495 struct rt6_info *rt, *nrt = NULL;
1496 struct netevent_redirect netevent;
c346dca1 1497 struct net *net = dev_net(neigh->dev);
a6279458
YH
1498
1499 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1500
8ed67789 1501 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1502 if (net_ratelimit())
1503 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1504 "for redirect target\n");
a6279458 1505 goto out;
1da177e4
LT
1506 }
1507
1da177e4
LT
1508 /*
1509 * We have finally decided to accept it.
1510 */
1511
1ab1457c 1512 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1513 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1514 NEIGH_UPDATE_F_OVERRIDE|
1515 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1516 NEIGH_UPDATE_F_ISROUTER))
1517 );
1518
1519 /*
1520 * Redirect received -> path was valid.
1521 * Look, redirects are sent only in response to data packets,
1522 * so that this nexthop apparently is reachable. --ANK
1523 */
d8d1f30b 1524 dst_confirm(&rt->dst);
1da177e4
LT
1525
1526 /* Duplicate redirect: silently ignore. */
d8d1f30b 1527 if (neigh == rt->dst.neighbour)
1da177e4
LT
1528 goto out;
1529
1530 nrt = ip6_rt_copy(rt);
1531 if (nrt == NULL)
1532 goto out;
1533
1534 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1535 if (on_link)
1536 nrt->rt6i_flags &= ~RTF_GATEWAY;
1537
1538 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1539 nrt->rt6i_dst.plen = 128;
d8d1f30b 1540 nrt->dst.flags |= DST_HOST;
1da177e4
LT
1541
1542 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1543 nrt->rt6i_nexthop = neigh_clone(neigh);
1544 /* Reset pmtu, it may be better */
defb3519 1545 dst_metric_set(&nrt->dst, RTAX_MTU, ipv6_get_mtu(neigh->dev));
1da177e4 1546
40e22e8f 1547 if (ip6_ins_rt(nrt))
1da177e4
LT
1548 goto out;
1549
d8d1f30b
CG
1550 netevent.old = &rt->dst;
1551 netevent.new = &nrt->dst;
8d71740c
TT
1552 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1553
1da177e4 1554 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1555 ip6_del_rt(rt);
1da177e4
LT
1556 return;
1557 }
1558
1559out:
d8d1f30b 1560 dst_release(&rt->dst);
1da177e4
LT
1561}
1562
1563/*
1564 * Handle ICMP "packet too big" messages
1565 * i.e. Path MTU discovery
1566 */
1567
ae878ae2
1568static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1569 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1570{
1571 struct rt6_info *rt, *nrt;
1572 int allfrag = 0;
1573
ae878ae2 1574 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1575 if (rt == NULL)
1576 return;
1577
d8d1f30b 1578 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1579 goto out;
1580
1581 if (pmtu < IPV6_MIN_MTU) {
1582 /*
1ab1457c 1583 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1584 * MTU (1280) and a fragment header should always be included
1585 * after a node receiving Too Big message reporting PMTU is
1586 * less than the IPv6 Minimum Link MTU.
1587 */
1588 pmtu = IPV6_MIN_MTU;
1589 allfrag = 1;
1590 }
1591
1592 /* New mtu received -> path was valid.
1593 They are sent only in response to data packets,
1594 so that this nexthop apparently is reachable. --ANK
1595 */
d8d1f30b 1596 dst_confirm(&rt->dst);
1da177e4
LT
1597
1598 /* Host route. If it is static, it would be better
1599 not to override it, but add new one, so that
1600 when cache entry will expire old pmtu
1601 would return automatically.
1602 */
1603 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1604 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1605 if (allfrag) {
1606 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1607 features |= RTAX_FEATURE_ALLFRAG;
1608 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1609 }
d8d1f30b 1610 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1611 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1612 goto out;
1613 }
1614
1615 /* Network route.
1616 Two cases are possible:
1617 1. It is connected route. Action: COW
1618 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1619 */
d5315b50 1620 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1621 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1622 else
1623 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1624
d5315b50 1625 if (nrt) {
defb3519
DM
1626 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1627 if (allfrag) {
1628 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1629 features |= RTAX_FEATURE_ALLFRAG;
1630 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1631 }
a1e78363
YH
1632
1633 /* According to RFC 1981, detecting PMTU increase shouldn't be
1634 * happened within 5 mins, the recommended timer is 10 mins.
1635 * Here this route expiration time is set to ip6_rt_mtu_expires
1636 * which is 10 mins. After 10 mins the decreased pmtu is expired
1637 * and detecting PMTU increase will be automatically happened.
1638 */
d8d1f30b 1639 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1640 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1641
40e22e8f 1642 ip6_ins_rt(nrt);
1da177e4 1643 }
1da177e4 1644out:
d8d1f30b 1645 dst_release(&rt->dst);
1da177e4
LT
1646}
1647
ae878ae2
1648void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1649 struct net_device *dev, u32 pmtu)
1650{
1651 struct net *net = dev_net(dev);
1652
1653 /*
1654 * RFC 1981 states that a node "MUST reduce the size of the packets it
1655 * is sending along the path" that caused the Packet Too Big message.
1656 * Since it's not possible in the general case to determine which
1657 * interface was used to send the original packet, we update the MTU
1658 * on the interface that will be used to send future packets. We also
1659 * update the MTU on the interface that received the Packet Too Big in
1660 * case the original packet was forced out that interface with
1661 * SO_BINDTODEVICE or similar. This is the next best thing to the
1662 * correct behaviour, which would be to update the MTU on all
1663 * interfaces.
1664 */
1665 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1666 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1667}
1668
1da177e4
LT
1669/*
1670 * Misc support functions
1671 */
1672
1673static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1674{
c346dca1 1675 struct net *net = dev_net(ort->rt6i_dev);
86393e52 1676 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1677
1678 if (rt) {
d8d1f30b
CG
1679 rt->dst.input = ort->dst.input;
1680 rt->dst.output = ort->dst.output;
1681
defb3519 1682 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b
CG
1683 rt->dst.error = ort->dst.error;
1684 rt->dst.dev = ort->dst.dev;
1685 if (rt->dst.dev)
1686 dev_hold(rt->dst.dev);
1da177e4
LT
1687 rt->rt6i_idev = ort->rt6i_idev;
1688 if (rt->rt6i_idev)
1689 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1690 rt->dst.lastuse = jiffies;
1da177e4
LT
1691 rt->rt6i_expires = 0;
1692
1693 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1694 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1695 rt->rt6i_metric = 0;
1696
1697 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1698#ifdef CONFIG_IPV6_SUBTREES
1699 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1700#endif
c71099ac 1701 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1702 }
1703 return rt;
1704}
1705
70ceb4f5 1706#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1707static struct rt6_info *rt6_get_route_info(struct net *net,
1708 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1709 struct in6_addr *gwaddr, int ifindex)
1710{
1711 struct fib6_node *fn;
1712 struct rt6_info *rt = NULL;
c71099ac
TG
1713 struct fib6_table *table;
1714
efa2cea0 1715 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1716 if (table == NULL)
1717 return NULL;
70ceb4f5 1718
c71099ac
TG
1719 write_lock_bh(&table->tb6_lock);
1720 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1721 if (!fn)
1722 goto out;
1723
d8d1f30b 1724 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1725 if (rt->rt6i_dev->ifindex != ifindex)
1726 continue;
1727 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1728 continue;
1729 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1730 continue;
d8d1f30b 1731 dst_hold(&rt->dst);
70ceb4f5
YH
1732 break;
1733 }
1734out:
c71099ac 1735 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1736 return rt;
1737}
1738
efa2cea0
DL
1739static struct rt6_info *rt6_add_route_info(struct net *net,
1740 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1741 struct in6_addr *gwaddr, int ifindex,
1742 unsigned pref)
1743{
86872cb5
TG
1744 struct fib6_config cfg = {
1745 .fc_table = RT6_TABLE_INFO,
238fc7ea 1746 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1747 .fc_ifindex = ifindex,
1748 .fc_dst_len = prefixlen,
1749 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1750 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1751 .fc_nlinfo.pid = 0,
1752 .fc_nlinfo.nlh = NULL,
1753 .fc_nlinfo.nl_net = net,
86872cb5
TG
1754 };
1755
1756 ipv6_addr_copy(&cfg.fc_dst, prefix);
1757 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1758
e317da96
YH
1759 /* We should treat it as a default route if prefix length is 0. */
1760 if (!prefixlen)
86872cb5 1761 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1762
86872cb5 1763 ip6_route_add(&cfg);
70ceb4f5 1764
efa2cea0 1765 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1766}
1767#endif
1768
1da177e4 1769struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1770{
1da177e4 1771 struct rt6_info *rt;
c71099ac 1772 struct fib6_table *table;
1da177e4 1773
c346dca1 1774 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1775 if (table == NULL)
1776 return NULL;
1da177e4 1777
c71099ac 1778 write_lock_bh(&table->tb6_lock);
d8d1f30b 1779 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1780 if (dev == rt->rt6i_dev &&
045927ff 1781 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1782 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1783 break;
1784 }
1785 if (rt)
d8d1f30b 1786 dst_hold(&rt->dst);
c71099ac 1787 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1788 return rt;
1789}
1790
1791struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1792 struct net_device *dev,
1793 unsigned int pref)
1da177e4 1794{
86872cb5
TG
1795 struct fib6_config cfg = {
1796 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1797 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1798 .fc_ifindex = dev->ifindex,
1799 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1800 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1801 .fc_nlinfo.pid = 0,
1802 .fc_nlinfo.nlh = NULL,
c346dca1 1803 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1804 };
1da177e4 1805
86872cb5 1806 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1807
86872cb5 1808 ip6_route_add(&cfg);
1da177e4 1809
1da177e4
LT
1810 return rt6_get_dflt_router(gwaddr, dev);
1811}
1812
7b4da532 1813void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1814{
1815 struct rt6_info *rt;
c71099ac
TG
1816 struct fib6_table *table;
1817
1818 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1819 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1820 if (table == NULL)
1821 return;
1da177e4
LT
1822
1823restart:
c71099ac 1824 read_lock_bh(&table->tb6_lock);
d8d1f30b 1825 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1826 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1827 dst_hold(&rt->dst);
c71099ac 1828 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1829 ip6_del_rt(rt);
1da177e4
LT
1830 goto restart;
1831 }
1832 }
c71099ac 1833 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1834}
1835
5578689a
DL
1836static void rtmsg_to_fib6_config(struct net *net,
1837 struct in6_rtmsg *rtmsg,
86872cb5
TG
1838 struct fib6_config *cfg)
1839{
1840 memset(cfg, 0, sizeof(*cfg));
1841
1842 cfg->fc_table = RT6_TABLE_MAIN;
1843 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1844 cfg->fc_metric = rtmsg->rtmsg_metric;
1845 cfg->fc_expires = rtmsg->rtmsg_info;
1846 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1847 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1848 cfg->fc_flags = rtmsg->rtmsg_flags;
1849
5578689a 1850 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1851
86872cb5
TG
1852 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1853 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1854 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1855}
1856
5578689a 1857int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1858{
86872cb5 1859 struct fib6_config cfg;
1da177e4
LT
1860 struct in6_rtmsg rtmsg;
1861 int err;
1862
1863 switch(cmd) {
1864 case SIOCADDRT: /* Add a route */
1865 case SIOCDELRT: /* Delete a route */
1866 if (!capable(CAP_NET_ADMIN))
1867 return -EPERM;
1868 err = copy_from_user(&rtmsg, arg,
1869 sizeof(struct in6_rtmsg));
1870 if (err)
1871 return -EFAULT;
86872cb5 1872
5578689a 1873 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1874
1da177e4
LT
1875 rtnl_lock();
1876 switch (cmd) {
1877 case SIOCADDRT:
86872cb5 1878 err = ip6_route_add(&cfg);
1da177e4
LT
1879 break;
1880 case SIOCDELRT:
86872cb5 1881 err = ip6_route_del(&cfg);
1da177e4
LT
1882 break;
1883 default:
1884 err = -EINVAL;
1885 }
1886 rtnl_unlock();
1887
1888 return err;
3ff50b79 1889 }
1da177e4
LT
1890
1891 return -EINVAL;
1892}
1893
1894/*
1895 * Drop the packet on the floor
1896 */
1897
d5fdd6ba 1898static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1899{
612f09e8 1900 int type;
adf30907 1901 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1902 switch (ipstats_mib_noroutes) {
1903 case IPSTATS_MIB_INNOROUTES:
0660e03f 1904 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1905 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1906 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1907 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1908 break;
1909 }
1910 /* FALLTHROUGH */
1911 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1912 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1913 ipstats_mib_noroutes);
612f09e8
YH
1914 break;
1915 }
3ffe533c 1916 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1917 kfree_skb(skb);
1918 return 0;
1919}
1920
9ce8ade0
TG
1921static int ip6_pkt_discard(struct sk_buff *skb)
1922{
612f09e8 1923 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1924}
1925
20380731 1926static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 1927{
adf30907 1928 skb->dev = skb_dst(skb)->dev;
612f09e8 1929 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1930}
1931
6723ab54
DM
1932#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1933
9ce8ade0
TG
1934static int ip6_pkt_prohibit(struct sk_buff *skb)
1935{
612f09e8 1936 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1937}
1938
1939static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1940{
adf30907 1941 skb->dev = skb_dst(skb)->dev;
612f09e8 1942 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1943}
1944
6723ab54
DM
1945#endif
1946
1da177e4
LT
1947/*
1948 * Allocate a dst for local (unicast / anycast) address.
1949 */
1950
1951struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1952 const struct in6_addr *addr,
1953 int anycast)
1954{
c346dca1 1955 struct net *net = dev_net(idev->dev);
86393e52 1956 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
14deae41 1957 struct neighbour *neigh;
1da177e4 1958
40385653
BG
1959 if (rt == NULL) {
1960 if (net_ratelimit())
1961 pr_warning("IPv6: Maximum number of routes reached,"
1962 " consider increasing route/max_size.\n");
1da177e4 1963 return ERR_PTR(-ENOMEM);
40385653 1964 }
1da177e4 1965
5578689a 1966 dev_hold(net->loopback_dev);
1da177e4
LT
1967 in6_dev_hold(idev);
1968
d8d1f30b
CG
1969 rt->dst.flags = DST_HOST;
1970 rt->dst.input = ip6_input;
1971 rt->dst.output = ip6_output;
5578689a 1972 rt->rt6i_dev = net->loopback_dev;
1da177e4 1973 rt->rt6i_idev = idev;
defb3519 1974 dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev));
defb3519 1975 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
d8d1f30b 1976 rt->dst.obsolete = -1;
1da177e4
LT
1977
1978 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1979 if (anycast)
1980 rt->rt6i_flags |= RTF_ANYCAST;
1981 else
1da177e4 1982 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
1983 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1984 if (IS_ERR(neigh)) {
d8d1f30b 1985 dst_free(&rt->dst);
14deae41
DM
1986
1987 /* We are casting this because that is the return
1988 * value type. But an errno encoded pointer is the
1989 * same regardless of the underlying pointer type,
1990 * and that's what we are returning. So this is OK.
1991 */
1992 return (struct rt6_info *) neigh;
1da177e4 1993 }
14deae41 1994 rt->rt6i_nexthop = neigh;
1da177e4
LT
1995
1996 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1997 rt->rt6i_dst.plen = 128;
5578689a 1998 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 1999
d8d1f30b 2000 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2001
2002 return rt;
2003}
2004
8ed67789
DL
2005struct arg_dev_net {
2006 struct net_device *dev;
2007 struct net *net;
2008};
2009
1da177e4
LT
2010static int fib6_ifdown(struct rt6_info *rt, void *arg)
2011{
8ed67789
DL
2012 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
2013 struct net *net = ((struct arg_dev_net *)arg)->net;
2014
2015 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2016 rt != net->ipv6.ip6_null_entry) {
1da177e4
LT
2017 RT6_TRACE("deleted by ifdown %p\n", rt);
2018 return -1;
2019 }
2020 return 0;
2021}
2022
f3db4851 2023void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2024{
8ed67789
DL
2025 struct arg_dev_net adn = {
2026 .dev = dev,
2027 .net = net,
2028 };
2029
2030 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2031 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2032}
2033
2034struct rt6_mtu_change_arg
2035{
2036 struct net_device *dev;
2037 unsigned mtu;
2038};
2039
2040static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2041{
2042 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2043 struct inet6_dev *idev;
2044
2045 /* In IPv6 pmtu discovery is not optional,
2046 so that RTAX_MTU lock cannot disable it.
2047 We still use this lock to block changes
2048 caused by addrconf/ndisc.
2049 */
2050
2051 idev = __in6_dev_get(arg->dev);
2052 if (idev == NULL)
2053 return 0;
2054
2055 /* For administrative MTU increase, there is no way to discover
2056 IPv6 PMTU increase, so PMTU increase should be updated here.
2057 Since RFC 1981 doesn't include administrative MTU increase
2058 update PMTU increase is a MUST. (i.e. jumbo frame)
2059 */
2060 /*
2061 If new MTU is less than route PMTU, this new MTU will be the
2062 lowest MTU in the path, update the route PMTU to reflect PMTU
2063 decreases; if new MTU is greater than route PMTU, and the
2064 old MTU is the lowest MTU in the path, update the route PMTU
2065 to reflect the increase. In this case if the other nodes' MTU
2066 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2067 PMTU discouvery.
2068 */
2069 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2070 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2071 (dst_mtu(&rt->dst) >= arg->mtu ||
2072 (dst_mtu(&rt->dst) < arg->mtu &&
2073 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2074 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2075 }
1da177e4
LT
2076 return 0;
2077}
2078
2079void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2080{
c71099ac
TG
2081 struct rt6_mtu_change_arg arg = {
2082 .dev = dev,
2083 .mtu = mtu,
2084 };
1da177e4 2085
c346dca1 2086 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2087}
2088
ef7c79ed 2089static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2090 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2091 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2092 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2093 [RTA_PRIORITY] = { .type = NLA_U32 },
2094 [RTA_METRICS] = { .type = NLA_NESTED },
2095};
2096
2097static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2098 struct fib6_config *cfg)
1da177e4 2099{
86872cb5
TG
2100 struct rtmsg *rtm;
2101 struct nlattr *tb[RTA_MAX+1];
2102 int err;
1da177e4 2103
86872cb5
TG
2104 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2105 if (err < 0)
2106 goto errout;
1da177e4 2107
86872cb5
TG
2108 err = -EINVAL;
2109 rtm = nlmsg_data(nlh);
2110 memset(cfg, 0, sizeof(*cfg));
2111
2112 cfg->fc_table = rtm->rtm_table;
2113 cfg->fc_dst_len = rtm->rtm_dst_len;
2114 cfg->fc_src_len = rtm->rtm_src_len;
2115 cfg->fc_flags = RTF_UP;
2116 cfg->fc_protocol = rtm->rtm_protocol;
2117
2118 if (rtm->rtm_type == RTN_UNREACHABLE)
2119 cfg->fc_flags |= RTF_REJECT;
2120
ab79ad14
2121 if (rtm->rtm_type == RTN_LOCAL)
2122 cfg->fc_flags |= RTF_LOCAL;
2123
86872cb5
TG
2124 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2125 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2126 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2127
2128 if (tb[RTA_GATEWAY]) {
2129 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2130 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2131 }
86872cb5
TG
2132
2133 if (tb[RTA_DST]) {
2134 int plen = (rtm->rtm_dst_len + 7) >> 3;
2135
2136 if (nla_len(tb[RTA_DST]) < plen)
2137 goto errout;
2138
2139 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2140 }
86872cb5
TG
2141
2142 if (tb[RTA_SRC]) {
2143 int plen = (rtm->rtm_src_len + 7) >> 3;
2144
2145 if (nla_len(tb[RTA_SRC]) < plen)
2146 goto errout;
2147
2148 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2149 }
86872cb5
TG
2150
2151 if (tb[RTA_OIF])
2152 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2153
2154 if (tb[RTA_PRIORITY])
2155 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2156
2157 if (tb[RTA_METRICS]) {
2158 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2159 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2160 }
86872cb5
TG
2161
2162 if (tb[RTA_TABLE])
2163 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2164
2165 err = 0;
2166errout:
2167 return err;
1da177e4
LT
2168}
2169
c127ea2c 2170static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2171{
86872cb5
TG
2172 struct fib6_config cfg;
2173 int err;
1da177e4 2174
86872cb5
TG
2175 err = rtm_to_fib6_config(skb, nlh, &cfg);
2176 if (err < 0)
2177 return err;
2178
2179 return ip6_route_del(&cfg);
1da177e4
LT
2180}
2181
c127ea2c 2182static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2183{
86872cb5
TG
2184 struct fib6_config cfg;
2185 int err;
1da177e4 2186
86872cb5
TG
2187 err = rtm_to_fib6_config(skb, nlh, &cfg);
2188 if (err < 0)
2189 return err;
2190
2191 return ip6_route_add(&cfg);
1da177e4
LT
2192}
2193
339bf98f
TG
2194static inline size_t rt6_nlmsg_size(void)
2195{
2196 return NLMSG_ALIGN(sizeof(struct rtmsg))
2197 + nla_total_size(16) /* RTA_SRC */
2198 + nla_total_size(16) /* RTA_DST */
2199 + nla_total_size(16) /* RTA_GATEWAY */
2200 + nla_total_size(16) /* RTA_PREFSRC */
2201 + nla_total_size(4) /* RTA_TABLE */
2202 + nla_total_size(4) /* RTA_IIF */
2203 + nla_total_size(4) /* RTA_OIF */
2204 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2205 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2206 + nla_total_size(sizeof(struct rta_cacheinfo));
2207}
2208
191cd582
BH
2209static int rt6_fill_node(struct net *net,
2210 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2211 struct in6_addr *dst, struct in6_addr *src,
2212 int iif, int type, u32 pid, u32 seq,
7bc570c8 2213 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2214{
2215 struct rtmsg *rtm;
2d7202bf 2216 struct nlmsghdr *nlh;
e3703b3d 2217 long expires;
9e762a4a 2218 u32 table;
1da177e4
LT
2219
2220 if (prefix) { /* user wants prefix routes only */
2221 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2222 /* success since this is not a prefix route */
2223 return 1;
2224 }
2225 }
2226
2d7202bf
TG
2227 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2228 if (nlh == NULL)
26932566 2229 return -EMSGSIZE;
2d7202bf
TG
2230
2231 rtm = nlmsg_data(nlh);
1da177e4
LT
2232 rtm->rtm_family = AF_INET6;
2233 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2234 rtm->rtm_src_len = rt->rt6i_src.plen;
2235 rtm->rtm_tos = 0;
c71099ac 2236 if (rt->rt6i_table)
9e762a4a 2237 table = rt->rt6i_table->tb6_id;
c71099ac 2238 else
9e762a4a
PM
2239 table = RT6_TABLE_UNSPEC;
2240 rtm->rtm_table = table;
2d7202bf 2241 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2242 if (rt->rt6i_flags&RTF_REJECT)
2243 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2244 else if (rt->rt6i_flags&RTF_LOCAL)
2245 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2246 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2247 rtm->rtm_type = RTN_LOCAL;
2248 else
2249 rtm->rtm_type = RTN_UNICAST;
2250 rtm->rtm_flags = 0;
2251 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2252 rtm->rtm_protocol = rt->rt6i_protocol;
2253 if (rt->rt6i_flags&RTF_DYNAMIC)
2254 rtm->rtm_protocol = RTPROT_REDIRECT;
2255 else if (rt->rt6i_flags & RTF_ADDRCONF)
2256 rtm->rtm_protocol = RTPROT_KERNEL;
2257 else if (rt->rt6i_flags&RTF_DEFAULT)
2258 rtm->rtm_protocol = RTPROT_RA;
2259
2260 if (rt->rt6i_flags&RTF_CACHE)
2261 rtm->rtm_flags |= RTM_F_CLONED;
2262
2263 if (dst) {
2d7202bf 2264 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2265 rtm->rtm_dst_len = 128;
1da177e4 2266 } else if (rtm->rtm_dst_len)
2d7202bf 2267 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2268#ifdef CONFIG_IPV6_SUBTREES
2269 if (src) {
2d7202bf 2270 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2271 rtm->rtm_src_len = 128;
1da177e4 2272 } else if (rtm->rtm_src_len)
2d7202bf 2273 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2274#endif
7bc570c8
YH
2275 if (iif) {
2276#ifdef CONFIG_IPV6_MROUTE
2277 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2278 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2279 if (err <= 0) {
2280 if (!nowait) {
2281 if (err == 0)
2282 return 0;
2283 goto nla_put_failure;
2284 } else {
2285 if (err == -EMSGSIZE)
2286 goto nla_put_failure;
2287 }
2288 }
2289 } else
2290#endif
2291 NLA_PUT_U32(skb, RTA_IIF, iif);
2292 } else if (dst) {
d8d1f30b 2293 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
1da177e4 2294 struct in6_addr saddr_buf;
191cd582 2295 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
7cbca67c 2296 dst, 0, &saddr_buf) == 0)
2d7202bf 2297 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2298 }
2d7202bf 2299
defb3519 2300 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2301 goto nla_put_failure;
2302
d8d1f30b
CG
2303 if (rt->dst.neighbour)
2304 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2d7202bf 2305
d8d1f30b 2306 if (rt->dst.dev)
2d7202bf
TG
2307 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2308
2309 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2310
36e3deae
YH
2311 if (!(rt->rt6i_flags & RTF_EXPIRES))
2312 expires = 0;
2313 else if (rt->rt6i_expires - jiffies < INT_MAX)
2314 expires = rt->rt6i_expires - jiffies;
2315 else
2316 expires = INT_MAX;
69cdf8f9 2317
d8d1f30b
CG
2318 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2319 expires, rt->dst.error) < 0)
e3703b3d 2320 goto nla_put_failure;
2d7202bf
TG
2321
2322 return nlmsg_end(skb, nlh);
2323
2324nla_put_failure:
26932566
PM
2325 nlmsg_cancel(skb, nlh);
2326 return -EMSGSIZE;
1da177e4
LT
2327}
2328
1b43af54 2329int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2330{
2331 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2332 int prefix;
2333
2d7202bf
TG
2334 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2335 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2336 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2337 } else
2338 prefix = 0;
2339
191cd582
BH
2340 return rt6_fill_node(arg->net,
2341 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2342 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2343 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2344}
2345
c127ea2c 2346static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2347{
3b1e0a65 2348 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2349 struct nlattr *tb[RTA_MAX+1];
2350 struct rt6_info *rt;
1da177e4 2351 struct sk_buff *skb;
ab364a6f 2352 struct rtmsg *rtm;
1da177e4 2353 struct flowi fl;
ab364a6f 2354 int err, iif = 0;
1da177e4 2355
ab364a6f
TG
2356 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2357 if (err < 0)
2358 goto errout;
1da177e4 2359
ab364a6f 2360 err = -EINVAL;
1da177e4 2361 memset(&fl, 0, sizeof(fl));
1da177e4 2362
ab364a6f
TG
2363 if (tb[RTA_SRC]) {
2364 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2365 goto errout;
2366
2367 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2368 }
2369
2370 if (tb[RTA_DST]) {
2371 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2372 goto errout;
2373
2374 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2375 }
2376
2377 if (tb[RTA_IIF])
2378 iif = nla_get_u32(tb[RTA_IIF]);
2379
2380 if (tb[RTA_OIF])
2381 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2382
2383 if (iif) {
2384 struct net_device *dev;
5578689a 2385 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2386 if (!dev) {
2387 err = -ENODEV;
ab364a6f 2388 goto errout;
1da177e4
LT
2389 }
2390 }
2391
ab364a6f
TG
2392 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2393 if (skb == NULL) {
2394 err = -ENOBUFS;
2395 goto errout;
2396 }
1da177e4 2397
ab364a6f
TG
2398 /* Reserve room for dummy headers, this skb can pass
2399 through good chunk of routing engine.
2400 */
459a98ed 2401 skb_reset_mac_header(skb);
ab364a6f 2402 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2403
8a3edd80 2404 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
d8d1f30b 2405 skb_dst_set(skb, &rt->dst);
1da177e4 2406
191cd582 2407 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2408 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2409 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2410 if (err < 0) {
ab364a6f
TG
2411 kfree_skb(skb);
2412 goto errout;
1da177e4
LT
2413 }
2414
5578689a 2415 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2416errout:
1da177e4 2417 return err;
1da177e4
LT
2418}
2419
86872cb5 2420void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2421{
2422 struct sk_buff *skb;
5578689a 2423 struct net *net = info->nl_net;
528c4ceb
DL
2424 u32 seq;
2425 int err;
2426
2427 err = -ENOBUFS;
2428 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2429
339bf98f 2430 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2431 if (skb == NULL)
2432 goto errout;
2433
191cd582 2434 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2435 event, info->pid, seq, 0, 0, 0);
26932566
PM
2436 if (err < 0) {
2437 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2438 WARN_ON(err == -EMSGSIZE);
2439 kfree_skb(skb);
2440 goto errout;
2441 }
1ce85fe4
PNA
2442 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2443 info->nlh, gfp_any());
2444 return;
21713ebc
TG
2445errout:
2446 if (err < 0)
5578689a 2447 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2448}
2449
8ed67789
DL
2450static int ip6_route_dev_notify(struct notifier_block *this,
2451 unsigned long event, void *data)
2452{
2453 struct net_device *dev = (struct net_device *)data;
c346dca1 2454 struct net *net = dev_net(dev);
8ed67789
DL
2455
2456 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2457 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2458 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2459#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2460 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2461 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2462 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2463 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2464#endif
2465 }
2466
2467 return NOTIFY_OK;
2468}
2469
1da177e4
LT
2470/*
2471 * /proc
2472 */
2473
2474#ifdef CONFIG_PROC_FS
2475
1da177e4
LT
2476struct rt6_proc_arg
2477{
2478 char *buffer;
2479 int offset;
2480 int length;
2481 int skip;
2482 int len;
2483};
2484
2485static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2486{
33120b30 2487 struct seq_file *m = p_arg;
1da177e4 2488
4b7a4274 2489 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2490
2491#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2492 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2493#else
33120b30 2494 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2495#endif
2496
2497 if (rt->rt6i_nexthop) {
4b7a4274 2498 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
1da177e4 2499 } else {
33120b30 2500 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2501 }
33120b30 2502 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2503 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2504 rt->dst.__use, rt->rt6i_flags,
33120b30 2505 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2506 return 0;
2507}
2508
33120b30 2509static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2510{
f3db4851
DL
2511 struct net *net = (struct net *)m->private;
2512 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2513 return 0;
2514}
1da177e4 2515
33120b30
AD
2516static int ipv6_route_open(struct inode *inode, struct file *file)
2517{
de05c557 2518 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2519}
2520
33120b30
AD
2521static const struct file_operations ipv6_route_proc_fops = {
2522 .owner = THIS_MODULE,
2523 .open = ipv6_route_open,
2524 .read = seq_read,
2525 .llseek = seq_lseek,
b6fcbdb4 2526 .release = single_release_net,
33120b30
AD
2527};
2528
1da177e4
LT
2529static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2530{
69ddb805 2531 struct net *net = (struct net *)seq->private;
1da177e4 2532 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2533 net->ipv6.rt6_stats->fib_nodes,
2534 net->ipv6.rt6_stats->fib_route_nodes,
2535 net->ipv6.rt6_stats->fib_rt_alloc,
2536 net->ipv6.rt6_stats->fib_rt_entries,
2537 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2538 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2539 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2540
2541 return 0;
2542}
2543
2544static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2545{
de05c557 2546 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2547}
2548
9a32144e 2549static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2550 .owner = THIS_MODULE,
2551 .open = rt6_stats_seq_open,
2552 .read = seq_read,
2553 .llseek = seq_lseek,
b6fcbdb4 2554 .release = single_release_net,
1da177e4
LT
2555};
2556#endif /* CONFIG_PROC_FS */
2557
2558#ifdef CONFIG_SYSCTL
2559
1da177e4 2560static
8d65af78 2561int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2562 void __user *buffer, size_t *lenp, loff_t *ppos)
2563{
5b7c931d
DL
2564 struct net *net = current->nsproxy->net_ns;
2565 int delay = net->ipv6.sysctl.flush_delay;
1da177e4 2566 if (write) {
8d65af78 2567 proc_dointvec(ctl, write, buffer, lenp, ppos);
5b7c931d 2568 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2569 return 0;
2570 } else
2571 return -EINVAL;
2572}
2573
760f2d01 2574ctl_table ipv6_route_table_template[] = {
1ab1457c 2575 {
1da177e4 2576 .procname = "flush",
4990509f 2577 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2578 .maxlen = sizeof(int),
89c8b3a1 2579 .mode = 0200,
6d9f239a 2580 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2581 },
2582 {
1da177e4 2583 .procname = "gc_thresh",
9a7ec3a9 2584 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2585 .maxlen = sizeof(int),
2586 .mode = 0644,
6d9f239a 2587 .proc_handler = proc_dointvec,
1da177e4
LT
2588 },
2589 {
1da177e4 2590 .procname = "max_size",
4990509f 2591 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2592 .maxlen = sizeof(int),
2593 .mode = 0644,
6d9f239a 2594 .proc_handler = proc_dointvec,
1da177e4
LT
2595 },
2596 {
1da177e4 2597 .procname = "gc_min_interval",
4990509f 2598 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2599 .maxlen = sizeof(int),
2600 .mode = 0644,
6d9f239a 2601 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2602 },
2603 {
1da177e4 2604 .procname = "gc_timeout",
4990509f 2605 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2606 .maxlen = sizeof(int),
2607 .mode = 0644,
6d9f239a 2608 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2609 },
2610 {
1da177e4 2611 .procname = "gc_interval",
4990509f 2612 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2613 .maxlen = sizeof(int),
2614 .mode = 0644,
6d9f239a 2615 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2616 },
2617 {
1da177e4 2618 .procname = "gc_elasticity",
4990509f 2619 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2620 .maxlen = sizeof(int),
2621 .mode = 0644,
f3d3f616 2622 .proc_handler = proc_dointvec,
1da177e4
LT
2623 },
2624 {
1da177e4 2625 .procname = "mtu_expires",
4990509f 2626 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2627 .maxlen = sizeof(int),
2628 .mode = 0644,
6d9f239a 2629 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2630 },
2631 {
1da177e4 2632 .procname = "min_adv_mss",
4990509f 2633 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2634 .maxlen = sizeof(int),
2635 .mode = 0644,
f3d3f616 2636 .proc_handler = proc_dointvec,
1da177e4
LT
2637 },
2638 {
1da177e4 2639 .procname = "gc_min_interval_ms",
4990509f 2640 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2641 .maxlen = sizeof(int),
2642 .mode = 0644,
6d9f239a 2643 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2644 },
f8572d8f 2645 { }
1da177e4
LT
2646};
2647
2c8c1e72 2648struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2649{
2650 struct ctl_table *table;
2651
2652 table = kmemdup(ipv6_route_table_template,
2653 sizeof(ipv6_route_table_template),
2654 GFP_KERNEL);
5ee09105
YH
2655
2656 if (table) {
2657 table[0].data = &net->ipv6.sysctl.flush_delay;
86393e52 2658 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2659 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2660 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2661 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2662 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2663 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2664 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2665 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2666 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2667 }
2668
760f2d01
DL
2669 return table;
2670}
1da177e4
LT
2671#endif
2672
2c8c1e72 2673static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2674{
633d424b 2675 int ret = -ENOMEM;
8ed67789 2676
86393e52
AD
2677 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2678 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2679
fc66f95c
ED
2680 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2681 goto out_ip6_dst_ops;
2682
8ed67789
DL
2683 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2684 sizeof(*net->ipv6.ip6_null_entry),
2685 GFP_KERNEL);
2686 if (!net->ipv6.ip6_null_entry)
fc66f95c 2687 goto out_ip6_dst_entries;
d8d1f30b 2688 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2689 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2690 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
defb3519 2691 dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255);
8ed67789
DL
2692
2693#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2694 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2695 sizeof(*net->ipv6.ip6_prohibit_entry),
2696 GFP_KERNEL);
68fffc67
PZ
2697 if (!net->ipv6.ip6_prohibit_entry)
2698 goto out_ip6_null_entry;
d8d1f30b 2699 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2700 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2701 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
defb3519 2702 dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255);
8ed67789
DL
2703
2704 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2705 sizeof(*net->ipv6.ip6_blk_hole_entry),
2706 GFP_KERNEL);
68fffc67
PZ
2707 if (!net->ipv6.ip6_blk_hole_entry)
2708 goto out_ip6_prohibit_entry;
d8d1f30b 2709 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2710 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2711 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
defb3519 2712 dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255);
8ed67789
DL
2713#endif
2714
b339a47c
PZ
2715 net->ipv6.sysctl.flush_delay = 0;
2716 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2717 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2718 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2719 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2720 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2721 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2722 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2723
cdb18761
DL
2724#ifdef CONFIG_PROC_FS
2725 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2726 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2727#endif
6891a346
BT
2728 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2729
8ed67789
DL
2730 ret = 0;
2731out:
2732 return ret;
f2fc6a54 2733
68fffc67
PZ
2734#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2735out_ip6_prohibit_entry:
2736 kfree(net->ipv6.ip6_prohibit_entry);
2737out_ip6_null_entry:
2738 kfree(net->ipv6.ip6_null_entry);
2739#endif
fc66f95c
ED
2740out_ip6_dst_entries:
2741 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2742out_ip6_dst_ops:
f2fc6a54 2743 goto out;
cdb18761
DL
2744}
2745
2c8c1e72 2746static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2747{
2748#ifdef CONFIG_PROC_FS
2749 proc_net_remove(net, "ipv6_route");
2750 proc_net_remove(net, "rt6_stats");
2751#endif
8ed67789
DL
2752 kfree(net->ipv6.ip6_null_entry);
2753#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2754 kfree(net->ipv6.ip6_prohibit_entry);
2755 kfree(net->ipv6.ip6_blk_hole_entry);
2756#endif
41bb78b4 2757 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2758}
2759
2760static struct pernet_operations ip6_route_net_ops = {
2761 .init = ip6_route_net_init,
2762 .exit = ip6_route_net_exit,
2763};
2764
8ed67789
DL
2765static struct notifier_block ip6_route_dev_notifier = {
2766 .notifier_call = ip6_route_dev_notify,
2767 .priority = 0,
2768};
2769
433d49c3 2770int __init ip6_route_init(void)
1da177e4 2771{
433d49c3
DL
2772 int ret;
2773
9a7ec3a9
DL
2774 ret = -ENOMEM;
2775 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2776 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2777 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2778 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2779 goto out;
14e50e57 2780
fc66f95c 2781 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2782 if (ret)
bdb3289f 2783 goto out_kmem_cache;
bdb3289f 2784
fc66f95c
ED
2785 ret = register_pernet_subsys(&ip6_route_net_ops);
2786 if (ret)
2787 goto out_dst_entries;
2788
5dc121e9
AE
2789 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2790
8ed67789
DL
2791 /* Registering of the loopback is done before this portion of code,
2792 * the loopback reference in rt6_info will not be taken, do it
2793 * manually for init_net */
d8d1f30b 2794 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2795 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2796 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2797 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2798 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2799 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2800 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2801 #endif
433d49c3
DL
2802 ret = fib6_init();
2803 if (ret)
8ed67789 2804 goto out_register_subsys;
433d49c3 2805
433d49c3
DL
2806 ret = xfrm6_init();
2807 if (ret)
cdb18761 2808 goto out_fib6_init;
c35b7e72 2809
433d49c3
DL
2810 ret = fib6_rules_init();
2811 if (ret)
2812 goto xfrm6_init;
7e5449c2 2813
433d49c3
DL
2814 ret = -ENOBUFS;
2815 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2816 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2817 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2818 goto fib6_rules_init;
c127ea2c 2819
8ed67789 2820 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2821 if (ret)
2822 goto fib6_rules_init;
8ed67789 2823
433d49c3
DL
2824out:
2825 return ret;
2826
2827fib6_rules_init:
433d49c3
DL
2828 fib6_rules_cleanup();
2829xfrm6_init:
433d49c3 2830 xfrm6_fini();
433d49c3 2831out_fib6_init:
433d49c3 2832 fib6_gc_cleanup();
8ed67789
DL
2833out_register_subsys:
2834 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
2835out_dst_entries:
2836 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 2837out_kmem_cache:
f2fc6a54 2838 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2839 goto out;
1da177e4
LT
2840}
2841
2842void ip6_route_cleanup(void)
2843{
8ed67789 2844 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2845 fib6_rules_cleanup();
1da177e4 2846 xfrm6_fini();
1da177e4 2847 fib6_gc_cleanup();
8ed67789 2848 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 2849 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 2850 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2851}