]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - net/ipv6/route.c
e1000e: Need to include vmalloc.h
[thirdparty/kernel/stable.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4 28#include <linux/errno.h>
bc3b2d7f 29#include <linux/export.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
7bc570c8 38#include <linux/mroute6.h>
1da177e4 39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
5b7c931d 43#include <linux/nsproxy.h>
5a0e3ad6 44#include <linux/slab.h>
457c4cbc 45#include <net/net_namespace.h>
1da177e4
LT
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
8d71740c 56#include <net/netevent.h>
21713ebc 57#include <net/netlink.h>
1da177e4
LT
58
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
21efcfa0
ED
65static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
66 const struct in6_addr *dest);
1da177e4 67static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 68static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 69static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
70static struct dst_entry *ip6_negative_advice(struct dst_entry *);
71static void ip6_dst_destroy(struct dst_entry *);
72static void ip6_dst_ifdown(struct dst_entry *,
73 struct net_device *dev, int how);
569d3645 74static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
75
76static int ip6_pkt_discard(struct sk_buff *skb);
77static int ip6_pkt_discard_out(struct sk_buff *skb);
78static void ip6_link_failure(struct sk_buff *skb);
79static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
80
70ceb4f5 81#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 82static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
83 const struct in6_addr *prefix, int prefixlen,
84 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5 85 unsigned pref);
efa2cea0 86static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
87 const struct in6_addr *prefix, int prefixlen,
88 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
89#endif
90
06582540
DM
91static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
92{
93 struct rt6_info *rt = (struct rt6_info *) dst;
94 struct inet_peer *peer;
95 u32 *p = NULL;
96
8e2ec639
YZ
97 if (!(rt->dst.flags & DST_HOST))
98 return NULL;
99
06582540
DM
100 if (!rt->rt6i_peer)
101 rt6_bind_peer(rt, 1);
102
103 peer = rt->rt6i_peer;
104 if (peer) {
105 u32 *old_p = __DST_METRICS_PTR(old);
106 unsigned long prev, new;
107
108 p = peer->metrics;
109 if (inet_metrics_new(peer))
110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111
112 new = (unsigned long) p;
113 prev = cmpxchg(&dst->_metrics, old, new);
114
115 if (prev != old) {
116 p = __DST_METRICS_PTR(prev);
117 if (prev & DST_METRICS_READ_ONLY)
118 p = NULL;
119 }
120 }
121 return p;
122}
123
39232973
DM
124static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
125{
126 struct in6_addr *p = &rt->rt6i_gateway;
127
128 if (p->s6_addr32[0] | p->s6_addr32[1] |
129 p->s6_addr32[2] | p->s6_addr32[3])
130 return (const void *) p;
131 return daddr;
132}
133
d3aaeb38
DM
134static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
135{
39232973
DM
136 struct rt6_info *rt = (struct rt6_info *) dst;
137 struct neighbour *n;
138
139 daddr = choose_neigh_daddr(rt, daddr);
140 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
141 if (n)
142 return n;
143 return neigh_create(&nd_tbl, daddr, dst->dev);
144}
145
8ade06c6 146static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 147{
8ade06c6
DM
148 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
149 if (!n) {
150 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
151 if (IS_ERR(n))
152 return PTR_ERR(n);
153 }
f83c7790
DM
154 dst_set_neighbour(&rt->dst, n);
155
156 return 0;
d3aaeb38
DM
157}
158
9a7ec3a9 159static struct dst_ops ip6_dst_ops_template = {
1da177e4 160 .family = AF_INET6,
09640e63 161 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
162 .gc = ip6_dst_gc,
163 .gc_thresh = 1024,
164 .check = ip6_dst_check,
0dbaee3b 165 .default_advmss = ip6_default_advmss,
ebb762f2 166 .mtu = ip6_mtu,
06582540 167 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
168 .destroy = ip6_dst_destroy,
169 .ifdown = ip6_dst_ifdown,
170 .negative_advice = ip6_negative_advice,
171 .link_failure = ip6_link_failure,
172 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 173 .local_out = __ip6_local_out,
d3aaeb38 174 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
175};
176
ebb762f2 177static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 178{
618f9bc7
SK
179 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
180
181 return mtu ? : dst->dev->mtu;
ec831ea7
RD
182}
183
14e50e57
DM
184static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
185{
186}
187
0972ddb2
HB
188static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
189 unsigned long old)
190{
191 return NULL;
192}
193
14e50e57
DM
194static struct dst_ops ip6_dst_blackhole_ops = {
195 .family = AF_INET6,
09640e63 196 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
197 .destroy = ip6_dst_destroy,
198 .check = ip6_dst_check,
ebb762f2 199 .mtu = ip6_blackhole_mtu,
214f45c9 200 .default_advmss = ip6_default_advmss,
14e50e57 201 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 202 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 203 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
204};
205
62fa8a84
DM
206static const u32 ip6_template_metrics[RTAX_MAX] = {
207 [RTAX_HOPLIMIT - 1] = 255,
208};
209
bdb3289f 210static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
211 .dst = {
212 .__refcnt = ATOMIC_INIT(1),
213 .__use = 1,
214 .obsolete = -1,
215 .error = -ENETUNREACH,
d8d1f30b
CG
216 .input = ip6_pkt_discard,
217 .output = ip6_pkt_discard_out,
1da177e4
LT
218 },
219 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 220 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
221 .rt6i_metric = ~(u32) 0,
222 .rt6i_ref = ATOMIC_INIT(1),
223};
224
101367c2
TG
225#ifdef CONFIG_IPV6_MULTIPLE_TABLES
226
6723ab54
DM
227static int ip6_pkt_prohibit(struct sk_buff *skb);
228static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 229
280a34c8 230static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
231 .dst = {
232 .__refcnt = ATOMIC_INIT(1),
233 .__use = 1,
234 .obsolete = -1,
235 .error = -EACCES,
d8d1f30b
CG
236 .input = ip6_pkt_prohibit,
237 .output = ip6_pkt_prohibit_out,
101367c2
TG
238 },
239 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 240 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
241 .rt6i_metric = ~(u32) 0,
242 .rt6i_ref = ATOMIC_INIT(1),
243};
244
bdb3289f 245static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
246 .dst = {
247 .__refcnt = ATOMIC_INIT(1),
248 .__use = 1,
249 .obsolete = -1,
250 .error = -EINVAL,
d8d1f30b
CG
251 .input = dst_discard,
252 .output = dst_discard,
101367c2
TG
253 },
254 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 255 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
256 .rt6i_metric = ~(u32) 0,
257 .rt6i_ref = ATOMIC_INIT(1),
258};
259
260#endif
261
1da177e4 262/* allocate dst with ip6_dst_ops */
5c1e6aa3 263static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
957c665f
DM
264 struct net_device *dev,
265 int flags)
1da177e4 266{
957c665f 267 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
cf911662 268
38308473 269 if (rt)
fbe58186 270 memset(&rt->rt6i_table, 0,
38308473 271 sizeof(*rt) - sizeof(struct dst_entry));
cf911662
DM
272
273 return rt;
1da177e4
LT
274}
275
276static void ip6_dst_destroy(struct dst_entry *dst)
277{
278 struct rt6_info *rt = (struct rt6_info *)dst;
279 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 280 struct inet_peer *peer = rt->rt6i_peer;
1da177e4 281
8e2ec639
YZ
282 if (!(rt->dst.flags & DST_HOST))
283 dst_destroy_metrics_generic(dst);
284
38308473 285 if (idev) {
1da177e4
LT
286 rt->rt6i_idev = NULL;
287 in6_dev_put(idev);
1ab1457c 288 }
b3419363 289 if (peer) {
b3419363
DM
290 rt->rt6i_peer = NULL;
291 inet_putpeer(peer);
292 }
293}
294
6431cbc2
DM
295static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
296
297static u32 rt6_peer_genid(void)
298{
299 return atomic_read(&__rt6_peer_genid);
300}
301
b3419363
DM
302void rt6_bind_peer(struct rt6_info *rt, int create)
303{
304 struct inet_peer *peer;
305
b3419363
DM
306 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
307 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
308 inet_putpeer(peer);
6431cbc2
DM
309 else
310 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
311}
312
313static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
314 int how)
315{
316 struct rt6_info *rt = (struct rt6_info *)dst;
317 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 318 struct net_device *loopback_dev =
c346dca1 319 dev_net(dev)->loopback_dev;
1da177e4 320
38308473 321 if (dev != loopback_dev && idev && idev->dev == dev) {
5a3e55d6
DL
322 struct inet6_dev *loopback_idev =
323 in6_dev_get(loopback_dev);
38308473 324 if (loopback_idev) {
1da177e4
LT
325 rt->rt6i_idev = loopback_idev;
326 in6_dev_put(idev);
327 }
328 }
329}
330
331static __inline__ int rt6_check_expired(const struct rt6_info *rt)
332{
a02cec21 333 return (rt->rt6i_flags & RTF_EXPIRES) &&
d1918542 334 time_after(jiffies, rt->dst.expires);
1da177e4
LT
335}
336
b71d1d42 337static inline int rt6_need_strict(const struct in6_addr *daddr)
c71099ac 338{
a02cec21
ED
339 return ipv6_addr_type(daddr) &
340 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
341}
342
1da177e4 343/*
c71099ac 344 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
345 */
346
8ed67789
DL
347static inline struct rt6_info *rt6_device_match(struct net *net,
348 struct rt6_info *rt,
b71d1d42 349 const struct in6_addr *saddr,
1da177e4 350 int oif,
d420895e 351 int flags)
1da177e4
LT
352{
353 struct rt6_info *local = NULL;
354 struct rt6_info *sprt;
355
dd3abc4e
YH
356 if (!oif && ipv6_addr_any(saddr))
357 goto out;
358
d8d1f30b 359 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 360 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
361
362 if (oif) {
1da177e4
LT
363 if (dev->ifindex == oif)
364 return sprt;
365 if (dev->flags & IFF_LOOPBACK) {
38308473 366 if (!sprt->rt6i_idev ||
1da177e4 367 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 368 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 369 continue;
1ab1457c 370 if (local && (!oif ||
1da177e4
LT
371 local->rt6i_idev->dev->ifindex == oif))
372 continue;
373 }
374 local = sprt;
375 }
dd3abc4e
YH
376 } else {
377 if (ipv6_chk_addr(net, saddr, dev,
378 flags & RT6_LOOKUP_F_IFACE))
379 return sprt;
1da177e4 380 }
dd3abc4e 381 }
1da177e4 382
dd3abc4e 383 if (oif) {
1da177e4
LT
384 if (local)
385 return local;
386
d420895e 387 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 388 return net->ipv6.ip6_null_entry;
1da177e4 389 }
dd3abc4e 390out:
1da177e4
LT
391 return rt;
392}
393
27097255
YH
394#ifdef CONFIG_IPV6_ROUTER_PREF
395static void rt6_probe(struct rt6_info *rt)
396{
f2c31e32 397 struct neighbour *neigh;
27097255
YH
398 /*
399 * Okay, this does not seem to be appropriate
400 * for now, however, we need to check if it
401 * is really so; aka Router Reachability Probing.
402 *
403 * Router Reachability Probe MUST be rate-limited
404 * to no more than one per minute.
405 */
f2c31e32 406 rcu_read_lock();
27217455 407 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
27097255 408 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 409 goto out;
27097255
YH
410 read_lock_bh(&neigh->lock);
411 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 412 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
413 struct in6_addr mcaddr;
414 struct in6_addr *target;
415
416 neigh->updated = jiffies;
417 read_unlock_bh(&neigh->lock);
418
419 target = (struct in6_addr *)&neigh->primary_key;
420 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 421 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 422 } else {
27097255 423 read_unlock_bh(&neigh->lock);
f2c31e32
ED
424 }
425out:
426 rcu_read_unlock();
27097255
YH
427}
428#else
429static inline void rt6_probe(struct rt6_info *rt)
430{
27097255
YH
431}
432#endif
433
1da177e4 434/*
554cfb7e 435 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 436 */
b6f99a21 437static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 438{
d1918542 439 struct net_device *dev = rt->dst.dev;
161980f4 440 if (!oif || dev->ifindex == oif)
554cfb7e 441 return 2;
161980f4
DM
442 if ((dev->flags & IFF_LOOPBACK) &&
443 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
444 return 1;
445 return 0;
554cfb7e 446}
1da177e4 447
b6f99a21 448static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 449{
f2c31e32 450 struct neighbour *neigh;
398bcbeb 451 int m;
f2c31e32
ED
452
453 rcu_read_lock();
27217455 454 neigh = dst_get_neighbour_noref(&rt->dst);
4d0c5911
YH
455 if (rt->rt6i_flags & RTF_NONEXTHOP ||
456 !(rt->rt6i_flags & RTF_GATEWAY))
457 m = 1;
458 else if (neigh) {
554cfb7e
YH
459 read_lock_bh(&neigh->lock);
460 if (neigh->nud_state & NUD_VALID)
4d0c5911 461 m = 2;
398bcbeb
YH
462#ifdef CONFIG_IPV6_ROUTER_PREF
463 else if (neigh->nud_state & NUD_FAILED)
464 m = 0;
465#endif
466 else
ea73ee23 467 m = 1;
554cfb7e 468 read_unlock_bh(&neigh->lock);
398bcbeb
YH
469 } else
470 m = 0;
f2c31e32 471 rcu_read_unlock();
554cfb7e 472 return m;
1da177e4
LT
473}
474
554cfb7e
YH
475static int rt6_score_route(struct rt6_info *rt, int oif,
476 int strict)
1da177e4 477{
4d0c5911 478 int m, n;
1ab1457c 479
4d0c5911 480 m = rt6_check_dev(rt, oif);
77d16f45 481 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 482 return -1;
ebacaaa0
YH
483#ifdef CONFIG_IPV6_ROUTER_PREF
484 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
485#endif
4d0c5911 486 n = rt6_check_neigh(rt);
557e92ef 487 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
488 return -1;
489 return m;
490}
491
f11e6659
DM
492static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
493 int *mpri, struct rt6_info *match)
554cfb7e 494{
f11e6659
DM
495 int m;
496
497 if (rt6_check_expired(rt))
498 goto out;
499
500 m = rt6_score_route(rt, oif, strict);
501 if (m < 0)
502 goto out;
503
504 if (m > *mpri) {
505 if (strict & RT6_LOOKUP_F_REACHABLE)
506 rt6_probe(match);
507 *mpri = m;
508 match = rt;
509 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
510 rt6_probe(rt);
511 }
512
513out:
514 return match;
515}
516
517static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
518 struct rt6_info *rr_head,
519 u32 metric, int oif, int strict)
520{
521 struct rt6_info *rt, *match;
554cfb7e 522 int mpri = -1;
1da177e4 523
f11e6659
DM
524 match = NULL;
525 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 526 rt = rt->dst.rt6_next)
f11e6659
DM
527 match = find_match(rt, oif, strict, &mpri, match);
528 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 529 rt = rt->dst.rt6_next)
f11e6659 530 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 531
f11e6659
DM
532 return match;
533}
1da177e4 534
f11e6659
DM
535static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
536{
537 struct rt6_info *match, *rt0;
8ed67789 538 struct net *net;
1da177e4 539
f11e6659
DM
540 rt0 = fn->rr_ptr;
541 if (!rt0)
542 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 543
f11e6659 544 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 545
554cfb7e 546 if (!match &&
f11e6659 547 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 548 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 549
554cfb7e 550 /* no entries matched; do round-robin */
f11e6659
DM
551 if (!next || next->rt6i_metric != rt0->rt6i_metric)
552 next = fn->leaf;
553
554 if (next != rt0)
555 fn->rr_ptr = next;
1da177e4 556 }
1da177e4 557
d1918542 558 net = dev_net(rt0->dst.dev);
a02cec21 559 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
560}
561
70ceb4f5
YH
562#ifdef CONFIG_IPV6_ROUTE_INFO
563int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 564 const struct in6_addr *gwaddr)
70ceb4f5 565{
c346dca1 566 struct net *net = dev_net(dev);
70ceb4f5
YH
567 struct route_info *rinfo = (struct route_info *) opt;
568 struct in6_addr prefix_buf, *prefix;
569 unsigned int pref;
4bed72e4 570 unsigned long lifetime;
70ceb4f5
YH
571 struct rt6_info *rt;
572
573 if (len < sizeof(struct route_info)) {
574 return -EINVAL;
575 }
576
577 /* Sanity check for prefix_len and length */
578 if (rinfo->length > 3) {
579 return -EINVAL;
580 } else if (rinfo->prefix_len > 128) {
581 return -EINVAL;
582 } else if (rinfo->prefix_len > 64) {
583 if (rinfo->length < 2) {
584 return -EINVAL;
585 }
586 } else if (rinfo->prefix_len > 0) {
587 if (rinfo->length < 1) {
588 return -EINVAL;
589 }
590 }
591
592 pref = rinfo->route_pref;
593 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 594 return -EINVAL;
70ceb4f5 595
4bed72e4 596 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
597
598 if (rinfo->length == 3)
599 prefix = (struct in6_addr *)rinfo->prefix;
600 else {
601 /* this function is safe */
602 ipv6_addr_prefix(&prefix_buf,
603 (struct in6_addr *)rinfo->prefix,
604 rinfo->prefix_len);
605 prefix = &prefix_buf;
606 }
607
efa2cea0
DL
608 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
609 dev->ifindex);
70ceb4f5
YH
610
611 if (rt && !lifetime) {
e0a1ad73 612 ip6_del_rt(rt);
70ceb4f5
YH
613 rt = NULL;
614 }
615
616 if (!rt && lifetime)
efa2cea0 617 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
618 pref);
619 else if (rt)
620 rt->rt6i_flags = RTF_ROUTEINFO |
621 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
622
623 if (rt) {
4bed72e4 624 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
625 rt->rt6i_flags &= ~RTF_EXPIRES;
626 } else {
d1918542 627 rt->dst.expires = jiffies + HZ * lifetime;
70ceb4f5
YH
628 rt->rt6i_flags |= RTF_EXPIRES;
629 }
d8d1f30b 630 dst_release(&rt->dst);
70ceb4f5
YH
631 }
632 return 0;
633}
634#endif
635
8ed67789 636#define BACKTRACK(__net, saddr) \
982f56f3 637do { \
8ed67789 638 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 639 struct fib6_node *pn; \
e0eda7bb 640 while (1) { \
982f56f3
YH
641 if (fn->fn_flags & RTN_TL_ROOT) \
642 goto out; \
643 pn = fn->parent; \
644 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 645 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
646 else \
647 fn = pn; \
648 if (fn->fn_flags & RTN_RTINFO) \
649 goto restart; \
c71099ac 650 } \
c71099ac 651 } \
38308473 652} while (0)
c71099ac 653
8ed67789
DL
654static struct rt6_info *ip6_pol_route_lookup(struct net *net,
655 struct fib6_table *table,
4c9483b2 656 struct flowi6 *fl6, int flags)
1da177e4
LT
657{
658 struct fib6_node *fn;
659 struct rt6_info *rt;
660
c71099ac 661 read_lock_bh(&table->tb6_lock);
4c9483b2 662 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
663restart:
664 rt = fn->leaf;
4c9483b2
DM
665 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
666 BACKTRACK(net, &fl6->saddr);
c71099ac 667out:
d8d1f30b 668 dst_use(&rt->dst, jiffies);
c71099ac 669 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
670 return rt;
671
672}
673
ea6e574e
FW
674struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
675 int flags)
676{
677 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
678}
679EXPORT_SYMBOL_GPL(ip6_route_lookup);
680
9acd9f3a
YH
681struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
682 const struct in6_addr *saddr, int oif, int strict)
c71099ac 683{
4c9483b2
DM
684 struct flowi6 fl6 = {
685 .flowi6_oif = oif,
686 .daddr = *daddr,
c71099ac
TG
687 };
688 struct dst_entry *dst;
77d16f45 689 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 690
adaa70bb 691 if (saddr) {
4c9483b2 692 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
693 flags |= RT6_LOOKUP_F_HAS_SADDR;
694 }
695
4c9483b2 696 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
697 if (dst->error == 0)
698 return (struct rt6_info *) dst;
699
700 dst_release(dst);
701
1da177e4
LT
702 return NULL;
703}
704
7159039a
YH
705EXPORT_SYMBOL(rt6_lookup);
706
c71099ac 707/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
708 It takes new route entry, the addition fails by any reason the
709 route is freed. In any case, if caller does not hold it, it may
710 be destroyed.
711 */
712
86872cb5 713static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
714{
715 int err;
c71099ac 716 struct fib6_table *table;
1da177e4 717
c71099ac
TG
718 table = rt->rt6i_table;
719 write_lock_bh(&table->tb6_lock);
86872cb5 720 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 721 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
722
723 return err;
724}
725
40e22e8f
TG
726int ip6_ins_rt(struct rt6_info *rt)
727{
4d1169c1 728 struct nl_info info = {
d1918542 729 .nl_net = dev_net(rt->dst.dev),
4d1169c1 730 };
528c4ceb 731 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
732}
733
21efcfa0
ED
734static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
735 const struct in6_addr *daddr,
b71d1d42 736 const struct in6_addr *saddr)
1da177e4 737{
1da177e4
LT
738 struct rt6_info *rt;
739
740 /*
741 * Clone the route.
742 */
743
21efcfa0 744 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
745
746 if (rt) {
14deae41
DM
747 int attempts = !in_softirq();
748
38308473 749 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 750 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 751 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 752 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 753 rt->rt6i_gateway = *daddr;
58c4fb86 754 }
1da177e4 755
1da177e4 756 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
757
758#ifdef CONFIG_IPV6_SUBTREES
759 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 760 rt->rt6i_src.addr = *saddr;
1da177e4
LT
761 rt->rt6i_src.plen = 128;
762 }
763#endif
764
14deae41 765 retry:
8ade06c6 766 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 767 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
768 int saved_rt_min_interval =
769 net->ipv6.sysctl.ip6_rt_gc_min_interval;
770 int saved_rt_elasticity =
771 net->ipv6.sysctl.ip6_rt_gc_elasticity;
772
773 if (attempts-- > 0) {
774 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
775 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
776
86393e52 777 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
778
779 net->ipv6.sysctl.ip6_rt_gc_elasticity =
780 saved_rt_elasticity;
781 net->ipv6.sysctl.ip6_rt_gc_min_interval =
782 saved_rt_min_interval;
783 goto retry;
784 }
785
786 if (net_ratelimit())
787 printk(KERN_WARNING
7e1b33e5 788 "ipv6: Neighbour table overflow.\n");
d8d1f30b 789 dst_free(&rt->dst);
14deae41
DM
790 return NULL;
791 }
95a9a5ba 792 }
1da177e4 793
95a9a5ba
YH
794 return rt;
795}
1da177e4 796
21efcfa0
ED
797static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
798 const struct in6_addr *daddr)
299d9939 799{
21efcfa0
ED
800 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
801
299d9939 802 if (rt) {
299d9939 803 rt->rt6i_flags |= RTF_CACHE;
27217455 804 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
299d9939
YH
805 }
806 return rt;
807}
808
8ed67789 809static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 810 struct flowi6 *fl6, int flags)
1da177e4
LT
811{
812 struct fib6_node *fn;
519fbd87 813 struct rt6_info *rt, *nrt;
c71099ac 814 int strict = 0;
1da177e4 815 int attempts = 3;
519fbd87 816 int err;
53b7997f 817 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 818
77d16f45 819 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
820
821relookup:
c71099ac 822 read_lock_bh(&table->tb6_lock);
1da177e4 823
8238dd06 824restart_2:
4c9483b2 825 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
826
827restart:
4acad72d 828 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 829
4c9483b2 830 BACKTRACK(net, &fl6->saddr);
8ed67789 831 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 832 rt->rt6i_flags & RTF_CACHE)
1ddef044 833 goto out;
1da177e4 834
d8d1f30b 835 dst_hold(&rt->dst);
c71099ac 836 read_unlock_bh(&table->tb6_lock);
fb9de91e 837
27217455 838 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 839 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 840 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 841 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
842 else
843 goto out2;
e40cf353 844
d8d1f30b 845 dst_release(&rt->dst);
8ed67789 846 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 847
d8d1f30b 848 dst_hold(&rt->dst);
519fbd87 849 if (nrt) {
40e22e8f 850 err = ip6_ins_rt(nrt);
519fbd87 851 if (!err)
1da177e4 852 goto out2;
1da177e4 853 }
1da177e4 854
519fbd87
YH
855 if (--attempts <= 0)
856 goto out2;
857
858 /*
c71099ac 859 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
860 * released someone could insert this route. Relookup.
861 */
d8d1f30b 862 dst_release(&rt->dst);
519fbd87
YH
863 goto relookup;
864
865out:
8238dd06
YH
866 if (reachable) {
867 reachable = 0;
868 goto restart_2;
869 }
d8d1f30b 870 dst_hold(&rt->dst);
c71099ac 871 read_unlock_bh(&table->tb6_lock);
1da177e4 872out2:
d8d1f30b
CG
873 rt->dst.lastuse = jiffies;
874 rt->dst.__use++;
c71099ac
TG
875
876 return rt;
1da177e4
LT
877}
878
8ed67789 879static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 880 struct flowi6 *fl6, int flags)
4acad72d 881{
4c9483b2 882 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
883}
884
c71099ac
TG
885void ip6_route_input(struct sk_buff *skb)
886{
b71d1d42 887 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 888 struct net *net = dev_net(skb->dev);
adaa70bb 889 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
890 struct flowi6 fl6 = {
891 .flowi6_iif = skb->dev->ifindex,
892 .daddr = iph->daddr,
893 .saddr = iph->saddr,
38308473 894 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
895 .flowi6_mark = skb->mark,
896 .flowi6_proto = iph->nexthdr,
c71099ac 897 };
adaa70bb 898
1d6e55f1 899 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 900 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 901
4c9483b2 902 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
c71099ac
TG
903}
904
8ed67789 905static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 906 struct flowi6 *fl6, int flags)
1da177e4 907{
4c9483b2 908 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
909}
910
9c7a4f9c 911struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 912 struct flowi6 *fl6)
c71099ac
TG
913{
914 int flags = 0;
915
4c9483b2 916 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 917 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 918
4c9483b2 919 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 920 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
921 else if (sk)
922 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 923
4c9483b2 924 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
925}
926
7159039a 927EXPORT_SYMBOL(ip6_route_output);
1da177e4 928
2774c131 929struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 930{
5c1e6aa3 931 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
932 struct dst_entry *new = NULL;
933
5c1e6aa3 934 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 935 if (rt) {
cf911662
DM
936 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
937
d8d1f30b 938 new = &rt->dst;
14e50e57 939
14e50e57 940 new->__use = 1;
352e512c
HX
941 new->input = dst_discard;
942 new->output = dst_discard;
14e50e57 943
21efcfa0
ED
944 if (dst_metrics_read_only(&ort->dst))
945 new->_metrics = ort->dst._metrics;
946 else
947 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
948 rt->rt6i_idev = ort->rt6i_idev;
949 if (rt->rt6i_idev)
950 in6_dev_hold(rt->rt6i_idev);
d1918542 951 rt->dst.expires = 0;
14e50e57 952
4e3fd7a0 953 rt->rt6i_gateway = ort->rt6i_gateway;
14e50e57
DM
954 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
955 rt->rt6i_metric = 0;
956
957 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
958#ifdef CONFIG_IPV6_SUBTREES
959 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
960#endif
961
962 dst_free(new);
963 }
964
69ead7af
DM
965 dst_release(dst_orig);
966 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 967}
14e50e57 968
1da177e4
LT
969/*
970 * Destination cache support functions
971 */
972
973static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
974{
975 struct rt6_info *rt;
976
977 rt = (struct rt6_info *) dst;
978
6431cbc2
DM
979 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
980 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
981 if (!rt->rt6i_peer)
982 rt6_bind_peer(rt, 0);
983 rt->rt6i_peer_genid = rt6_peer_genid();
984 }
1da177e4 985 return dst;
6431cbc2 986 }
1da177e4
LT
987 return NULL;
988}
989
990static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
991{
992 struct rt6_info *rt = (struct rt6_info *) dst;
993
994 if (rt) {
54c1a859
YH
995 if (rt->rt6i_flags & RTF_CACHE) {
996 if (rt6_check_expired(rt)) {
997 ip6_del_rt(rt);
998 dst = NULL;
999 }
1000 } else {
1da177e4 1001 dst_release(dst);
54c1a859
YH
1002 dst = NULL;
1003 }
1da177e4 1004 }
54c1a859 1005 return dst;
1da177e4
LT
1006}
1007
1008static void ip6_link_failure(struct sk_buff *skb)
1009{
1010 struct rt6_info *rt;
1011
3ffe533c 1012 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1013
adf30907 1014 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1015 if (rt) {
38308473 1016 if (rt->rt6i_flags & RTF_CACHE) {
d8d1f30b 1017 dst_set_expires(&rt->dst, 0);
1da177e4
LT
1018 rt->rt6i_flags |= RTF_EXPIRES;
1019 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1020 rt->rt6i_node->fn_sernum = -1;
1021 }
1022}
1023
1024static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1025{
1026 struct rt6_info *rt6 = (struct rt6_info*)dst;
1027
1028 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1029 rt6->rt6i_flags |= RTF_MODIFIED;
1030 if (mtu < IPV6_MIN_MTU) {
defb3519 1031 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1032 mtu = IPV6_MIN_MTU;
defb3519
DM
1033 features |= RTAX_FEATURE_ALLFRAG;
1034 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1035 }
defb3519 1036 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
1037 }
1038}
1039
0dbaee3b 1040static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1041{
0dbaee3b
DM
1042 struct net_device *dev = dst->dev;
1043 unsigned int mtu = dst_mtu(dst);
1044 struct net *net = dev_net(dev);
1045
1da177e4
LT
1046 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1047
5578689a
DL
1048 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1049 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1050
1051 /*
1ab1457c
YH
1052 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1053 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1054 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1055 * rely only on pmtu discovery"
1056 */
1057 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1058 mtu = IPV6_MAXPLEN;
1059 return mtu;
1060}
1061
ebb762f2 1062static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1063{
d33e4553 1064 struct inet6_dev *idev;
618f9bc7
SK
1065 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1066
1067 if (mtu)
1068 return mtu;
1069
1070 mtu = IPV6_MIN_MTU;
d33e4553
DM
1071
1072 rcu_read_lock();
1073 idev = __in6_dev_get(dst->dev);
1074 if (idev)
1075 mtu = idev->cnf.mtu6;
1076 rcu_read_unlock();
1077
1078 return mtu;
1079}
1080
3b00944c
YH
1081static struct dst_entry *icmp6_dst_gc_list;
1082static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1083
3b00944c 1084struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1085 struct neighbour *neigh,
87a11578 1086 struct flowi6 *fl6)
1da177e4 1087{
87a11578 1088 struct dst_entry *dst;
1da177e4
LT
1089 struct rt6_info *rt;
1090 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1091 struct net *net = dev_net(dev);
1da177e4 1092
38308473 1093 if (unlikely(!idev))
1da177e4
LT
1094 return NULL;
1095
957c665f 1096 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
38308473 1097 if (unlikely(!rt)) {
1da177e4 1098 in6_dev_put(idev);
87a11578 1099 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1100 goto out;
1101 }
1102
1da177e4
LT
1103 if (neigh)
1104 neigh_hold(neigh);
14deae41 1105 else {
f83c7790 1106 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
b43faac6 1107 if (IS_ERR(neigh)) {
252c3d84 1108 in6_dev_put(idev);
b43faac6
DM
1109 dst_free(&rt->dst);
1110 return ERR_CAST(neigh);
1111 }
14deae41 1112 }
1da177e4 1113
8e2ec639
YZ
1114 rt->dst.flags |= DST_HOST;
1115 rt->dst.output = ip6_output;
69cce1d1 1116 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1117 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1118 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1119 rt->rt6i_dst.plen = 128;
1120 rt->rt6i_idev = idev;
7011687f 1121 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1122
3b00944c 1123 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1124 rt->dst.next = icmp6_dst_gc_list;
1125 icmp6_dst_gc_list = &rt->dst;
3b00944c 1126 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1127
5578689a 1128 fib6_force_start_gc(net);
1da177e4 1129
87a11578
DM
1130 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1131
1da177e4 1132out:
87a11578 1133 return dst;
1da177e4
LT
1134}
1135
3d0f24a7 1136int icmp6_dst_gc(void)
1da177e4 1137{
e9476e95 1138 struct dst_entry *dst, **pprev;
3d0f24a7 1139 int more = 0;
1da177e4 1140
3b00944c
YH
1141 spin_lock_bh(&icmp6_dst_lock);
1142 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1143
1da177e4
LT
1144 while ((dst = *pprev) != NULL) {
1145 if (!atomic_read(&dst->__refcnt)) {
1146 *pprev = dst->next;
1147 dst_free(dst);
1da177e4
LT
1148 } else {
1149 pprev = &dst->next;
3d0f24a7 1150 ++more;
1da177e4
LT
1151 }
1152 }
1153
3b00944c 1154 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1155
3d0f24a7 1156 return more;
1da177e4
LT
1157}
1158
1e493d19
DM
1159static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1160 void *arg)
1161{
1162 struct dst_entry *dst, **pprev;
1163
1164 spin_lock_bh(&icmp6_dst_lock);
1165 pprev = &icmp6_dst_gc_list;
1166 while ((dst = *pprev) != NULL) {
1167 struct rt6_info *rt = (struct rt6_info *) dst;
1168 if (func(rt, arg)) {
1169 *pprev = dst->next;
1170 dst_free(dst);
1171 } else {
1172 pprev = &dst->next;
1173 }
1174 }
1175 spin_unlock_bh(&icmp6_dst_lock);
1176}
1177
569d3645 1178static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1179{
1da177e4 1180 unsigned long now = jiffies;
86393e52 1181 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1182 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1183 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1184 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1185 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1186 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1187 int entries;
7019b78e 1188
fc66f95c 1189 entries = dst_entries_get_fast(ops);
7019b78e 1190 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1191 entries <= rt_max_size)
1da177e4
LT
1192 goto out;
1193
6891a346
BT
1194 net->ipv6.ip6_rt_gc_expire++;
1195 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1196 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1197 entries = dst_entries_get_slow(ops);
1198 if (entries < ops->gc_thresh)
7019b78e 1199 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1200out:
7019b78e 1201 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1202 return entries > rt_max_size;
1da177e4
LT
1203}
1204
1205/* Clean host part of a prefix. Not necessary in radix tree,
1206 but results in cleaner routing tables.
1207
1208 Remove it only when all the things will work!
1209 */
1210
6b75d090 1211int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1212{
5170ae82 1213 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1214 if (hoplimit == 0) {
6b75d090 1215 struct net_device *dev = dst->dev;
c68f24cc
ED
1216 struct inet6_dev *idev;
1217
1218 rcu_read_lock();
1219 idev = __in6_dev_get(dev);
1220 if (idev)
6b75d090 1221 hoplimit = idev->cnf.hop_limit;
c68f24cc 1222 else
53b7997f 1223 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1224 rcu_read_unlock();
1da177e4
LT
1225 }
1226 return hoplimit;
1227}
abbf46ae 1228EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1229
1230/*
1231 *
1232 */
1233
86872cb5 1234int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1235{
1236 int err;
5578689a 1237 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1238 struct rt6_info *rt = NULL;
1239 struct net_device *dev = NULL;
1240 struct inet6_dev *idev = NULL;
c71099ac 1241 struct fib6_table *table;
1da177e4
LT
1242 int addr_type;
1243
86872cb5 1244 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1245 return -EINVAL;
1246#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1247 if (cfg->fc_src_len)
1da177e4
LT
1248 return -EINVAL;
1249#endif
86872cb5 1250 if (cfg->fc_ifindex) {
1da177e4 1251 err = -ENODEV;
5578689a 1252 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1253 if (!dev)
1254 goto out;
1255 idev = in6_dev_get(dev);
1256 if (!idev)
1257 goto out;
1258 }
1259
86872cb5
TG
1260 if (cfg->fc_metric == 0)
1261 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1262
d71314b4 1263 err = -ENOBUFS;
38308473
DM
1264 if (cfg->fc_nlinfo.nlh &&
1265 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1266 table = fib6_get_table(net, cfg->fc_table);
38308473 1267 if (!table) {
d71314b4
MV
1268 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1269 table = fib6_new_table(net, cfg->fc_table);
1270 }
1271 } else {
1272 table = fib6_new_table(net, cfg->fc_table);
1273 }
38308473
DM
1274
1275 if (!table)
c71099ac 1276 goto out;
c71099ac 1277
957c665f 1278 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1da177e4 1279
38308473 1280 if (!rt) {
1da177e4
LT
1281 err = -ENOMEM;
1282 goto out;
1283 }
1284
d8d1f30b 1285 rt->dst.obsolete = -1;
d1918542 1286 rt->dst.expires = (cfg->fc_flags & RTF_EXPIRES) ?
6f704992
YH
1287 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1288 0;
1da177e4 1289
86872cb5
TG
1290 if (cfg->fc_protocol == RTPROT_UNSPEC)
1291 cfg->fc_protocol = RTPROT_BOOT;
1292 rt->rt6i_protocol = cfg->fc_protocol;
1293
1294 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1295
1296 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1297 rt->dst.input = ip6_mc_input;
ab79ad14
1298 else if (cfg->fc_flags & RTF_LOCAL)
1299 rt->dst.input = ip6_input;
1da177e4 1300 else
d8d1f30b 1301 rt->dst.input = ip6_forward;
1da177e4 1302
d8d1f30b 1303 rt->dst.output = ip6_output;
1da177e4 1304
86872cb5
TG
1305 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1306 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1307 if (rt->rt6i_dst.plen == 128)
11d53b49 1308 rt->dst.flags |= DST_HOST;
1da177e4 1309
8e2ec639
YZ
1310 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1311 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1312 if (!metrics) {
1313 err = -ENOMEM;
1314 goto out;
1315 }
1316 dst_init_metrics(&rt->dst, metrics, 0);
1317 }
1da177e4 1318#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1319 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1320 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1321#endif
1322
86872cb5 1323 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1324
1325 /* We cannot add true routes via loopback here,
1326 they would result in kernel looping; promote them to reject routes
1327 */
86872cb5 1328 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1329 (dev && (dev->flags & IFF_LOOPBACK) &&
1330 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1331 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1332 /* hold loopback dev/idev if we haven't done so. */
5578689a 1333 if (dev != net->loopback_dev) {
1da177e4
LT
1334 if (dev) {
1335 dev_put(dev);
1336 in6_dev_put(idev);
1337 }
5578689a 1338 dev = net->loopback_dev;
1da177e4
LT
1339 dev_hold(dev);
1340 idev = in6_dev_get(dev);
1341 if (!idev) {
1342 err = -ENODEV;
1343 goto out;
1344 }
1345 }
d8d1f30b
CG
1346 rt->dst.output = ip6_pkt_discard_out;
1347 rt->dst.input = ip6_pkt_discard;
1348 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1349 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1350 goto install_route;
1351 }
1352
86872cb5 1353 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1354 const struct in6_addr *gw_addr;
1da177e4
LT
1355 int gwa_type;
1356
86872cb5 1357 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1358 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1359 gwa_type = ipv6_addr_type(gw_addr);
1360
1361 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1362 struct rt6_info *grt;
1363
1364 /* IPv6 strictly inhibits using not link-local
1365 addresses as nexthop address.
1366 Otherwise, router will not able to send redirects.
1367 It is very good, but in some (rare!) circumstances
1368 (SIT, PtP, NBMA NOARP links) it is handy to allow
1369 some exceptions. --ANK
1370 */
1371 err = -EINVAL;
38308473 1372 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1373 goto out;
1374
5578689a 1375 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1376
1377 err = -EHOSTUNREACH;
38308473 1378 if (!grt)
1da177e4
LT
1379 goto out;
1380 if (dev) {
d1918542 1381 if (dev != grt->dst.dev) {
d8d1f30b 1382 dst_release(&grt->dst);
1da177e4
LT
1383 goto out;
1384 }
1385 } else {
d1918542 1386 dev = grt->dst.dev;
1da177e4
LT
1387 idev = grt->rt6i_idev;
1388 dev_hold(dev);
1389 in6_dev_hold(grt->rt6i_idev);
1390 }
38308473 1391 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1392 err = 0;
d8d1f30b 1393 dst_release(&grt->dst);
1da177e4
LT
1394
1395 if (err)
1396 goto out;
1397 }
1398 err = -EINVAL;
38308473 1399 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1400 goto out;
1401 }
1402
1403 err = -ENODEV;
38308473 1404 if (!dev)
1da177e4
LT
1405 goto out;
1406
c3968a85
DW
1407 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1408 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1409 err = -EINVAL;
1410 goto out;
1411 }
4e3fd7a0 1412 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1413 rt->rt6i_prefsrc.plen = 128;
1414 } else
1415 rt->rt6i_prefsrc.plen = 0;
1416
86872cb5 1417 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1418 err = rt6_bind_neighbour(rt, dev);
f83c7790 1419 if (err)
1da177e4 1420 goto out;
1da177e4
LT
1421 }
1422
86872cb5 1423 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1424
1425install_route:
86872cb5
TG
1426 if (cfg->fc_mx) {
1427 struct nlattr *nla;
1428 int remaining;
1429
1430 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1431 int type = nla_type(nla);
86872cb5
TG
1432
1433 if (type) {
1434 if (type > RTAX_MAX) {
1da177e4
LT
1435 err = -EINVAL;
1436 goto out;
1437 }
86872cb5 1438
defb3519 1439 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1440 }
1da177e4
LT
1441 }
1442 }
1443
d8d1f30b 1444 rt->dst.dev = dev;
1da177e4 1445 rt->rt6i_idev = idev;
c71099ac 1446 rt->rt6i_table = table;
63152fc0 1447
c346dca1 1448 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1449
86872cb5 1450 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1451
1452out:
1453 if (dev)
1454 dev_put(dev);
1455 if (idev)
1456 in6_dev_put(idev);
1457 if (rt)
d8d1f30b 1458 dst_free(&rt->dst);
1da177e4
LT
1459 return err;
1460}
1461
86872cb5 1462static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1463{
1464 int err;
c71099ac 1465 struct fib6_table *table;
d1918542 1466 struct net *net = dev_net(rt->dst.dev);
1da177e4 1467
8ed67789 1468 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1469 return -ENOENT;
1470
c71099ac
TG
1471 table = rt->rt6i_table;
1472 write_lock_bh(&table->tb6_lock);
1da177e4 1473
86872cb5 1474 err = fib6_del(rt, info);
d8d1f30b 1475 dst_release(&rt->dst);
1da177e4 1476
c71099ac 1477 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1478
1479 return err;
1480}
1481
e0a1ad73
TG
1482int ip6_del_rt(struct rt6_info *rt)
1483{
4d1169c1 1484 struct nl_info info = {
d1918542 1485 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1486 };
528c4ceb 1487 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1488}
1489
86872cb5 1490static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1491{
c71099ac 1492 struct fib6_table *table;
1da177e4
LT
1493 struct fib6_node *fn;
1494 struct rt6_info *rt;
1495 int err = -ESRCH;
1496
5578689a 1497 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1498 if (!table)
c71099ac
TG
1499 return err;
1500
1501 read_lock_bh(&table->tb6_lock);
1da177e4 1502
c71099ac 1503 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1504 &cfg->fc_dst, cfg->fc_dst_len,
1505 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1506
1da177e4 1507 if (fn) {
d8d1f30b 1508 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1509 if (cfg->fc_ifindex &&
d1918542
DM
1510 (!rt->dst.dev ||
1511 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1512 continue;
86872cb5
TG
1513 if (cfg->fc_flags & RTF_GATEWAY &&
1514 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1515 continue;
86872cb5 1516 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1517 continue;
d8d1f30b 1518 dst_hold(&rt->dst);
c71099ac 1519 read_unlock_bh(&table->tb6_lock);
1da177e4 1520
86872cb5 1521 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1522 }
1523 }
c71099ac 1524 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1525
1526 return err;
1527}
1528
1529/*
1530 * Handle redirects
1531 */
a6279458 1532struct ip6rd_flowi {
4c9483b2 1533 struct flowi6 fl6;
a6279458
YH
1534 struct in6_addr gateway;
1535};
1536
8ed67789
DL
1537static struct rt6_info *__ip6_route_redirect(struct net *net,
1538 struct fib6_table *table,
4c9483b2 1539 struct flowi6 *fl6,
a6279458 1540 int flags)
1da177e4 1541{
4c9483b2 1542 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1543 struct rt6_info *rt;
e843b9e1 1544 struct fib6_node *fn;
c71099ac 1545
1da177e4 1546 /*
e843b9e1
YH
1547 * Get the "current" route for this destination and
1548 * check if the redirect has come from approriate router.
1549 *
1550 * RFC 2461 specifies that redirects should only be
1551 * accepted if they come from the nexthop to the target.
1552 * Due to the way the routes are chosen, this notion
1553 * is a bit fuzzy and one might need to check all possible
1554 * routes.
1da177e4 1555 */
1da177e4 1556
c71099ac 1557 read_lock_bh(&table->tb6_lock);
4c9483b2 1558 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1559restart:
d8d1f30b 1560 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1561 /*
1562 * Current route is on-link; redirect is always invalid.
1563 *
1564 * Seems, previous statement is not true. It could
1565 * be node, which looks for us as on-link (f.e. proxy ndisc)
1566 * But then router serving it might decide, that we should
1567 * know truth 8)8) --ANK (980726).
1568 */
1569 if (rt6_check_expired(rt))
1570 continue;
1571 if (!(rt->rt6i_flags & RTF_GATEWAY))
1572 continue;
d1918542 1573 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
e843b9e1 1574 continue;
a6279458 1575 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1576 continue;
1577 break;
1578 }
a6279458 1579
cb15d9c2 1580 if (!rt)
8ed67789 1581 rt = net->ipv6.ip6_null_entry;
4c9483b2 1582 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1583out:
d8d1f30b 1584 dst_hold(&rt->dst);
a6279458 1585
c71099ac 1586 read_unlock_bh(&table->tb6_lock);
e843b9e1 1587
a6279458
YH
1588 return rt;
1589};
1590
b71d1d42
ED
1591static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1592 const struct in6_addr *src,
1593 const struct in6_addr *gateway,
a6279458
YH
1594 struct net_device *dev)
1595{
adaa70bb 1596 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1597 struct net *net = dev_net(dev);
a6279458 1598 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1599 .fl6 = {
1600 .flowi6_oif = dev->ifindex,
1601 .daddr = *dest,
1602 .saddr = *src,
a6279458 1603 },
a6279458 1604 };
adaa70bb 1605
4e3fd7a0 1606 rdfl.gateway = *gateway;
86c36ce4 1607
adaa70bb
TG
1608 if (rt6_need_strict(dest))
1609 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1610
4c9483b2 1611 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1612 flags, __ip6_route_redirect);
a6279458
YH
1613}
1614
b71d1d42
ED
1615void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1616 const struct in6_addr *saddr,
a6279458
YH
1617 struct neighbour *neigh, u8 *lladdr, int on_link)
1618{
1619 struct rt6_info *rt, *nrt = NULL;
1620 struct netevent_redirect netevent;
c346dca1 1621 struct net *net = dev_net(neigh->dev);
a6279458
YH
1622
1623 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1624
8ed67789 1625 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1626 if (net_ratelimit())
1627 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1628 "for redirect target\n");
a6279458 1629 goto out;
1da177e4
LT
1630 }
1631
1da177e4
LT
1632 /*
1633 * We have finally decided to accept it.
1634 */
1635
1ab1457c 1636 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1637 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1638 NEIGH_UPDATE_F_OVERRIDE|
1639 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1640 NEIGH_UPDATE_F_ISROUTER))
1641 );
1642
1643 /*
1644 * Redirect received -> path was valid.
1645 * Look, redirects are sent only in response to data packets,
1646 * so that this nexthop apparently is reachable. --ANK
1647 */
d8d1f30b 1648 dst_confirm(&rt->dst);
1da177e4
LT
1649
1650 /* Duplicate redirect: silently ignore. */
27217455 1651 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1da177e4
LT
1652 goto out;
1653
21efcfa0 1654 nrt = ip6_rt_copy(rt, dest);
38308473 1655 if (!nrt)
1da177e4
LT
1656 goto out;
1657
1658 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1659 if (on_link)
1660 nrt->rt6i_flags &= ~RTF_GATEWAY;
1661
4e3fd7a0 1662 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
69cce1d1 1663 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1664
40e22e8f 1665 if (ip6_ins_rt(nrt))
1da177e4
LT
1666 goto out;
1667
d8d1f30b
CG
1668 netevent.old = &rt->dst;
1669 netevent.new = &nrt->dst;
8d71740c
TT
1670 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1671
38308473 1672 if (rt->rt6i_flags & RTF_CACHE) {
e0a1ad73 1673 ip6_del_rt(rt);
1da177e4
LT
1674 return;
1675 }
1676
1677out:
d8d1f30b 1678 dst_release(&rt->dst);
1da177e4
LT
1679}
1680
1681/*
1682 * Handle ICMP "packet too big" messages
1683 * i.e. Path MTU discovery
1684 */
1685
b71d1d42 1686static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1687 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1688{
1689 struct rt6_info *rt, *nrt;
1690 int allfrag = 0;
d3052b55 1691again:
ae878ae2 1692 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
38308473 1693 if (!rt)
1da177e4
LT
1694 return;
1695
d3052b55
AV
1696 if (rt6_check_expired(rt)) {
1697 ip6_del_rt(rt);
1698 goto again;
1699 }
1700
d8d1f30b 1701 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1702 goto out;
1703
1704 if (pmtu < IPV6_MIN_MTU) {
1705 /*
1ab1457c 1706 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1707 * MTU (1280) and a fragment header should always be included
1708 * after a node receiving Too Big message reporting PMTU is
1709 * less than the IPv6 Minimum Link MTU.
1710 */
1711 pmtu = IPV6_MIN_MTU;
1712 allfrag = 1;
1713 }
1714
1715 /* New mtu received -> path was valid.
1716 They are sent only in response to data packets,
1717 so that this nexthop apparently is reachable. --ANK
1718 */
d8d1f30b 1719 dst_confirm(&rt->dst);
1da177e4
LT
1720
1721 /* Host route. If it is static, it would be better
1722 not to override it, but add new one, so that
1723 when cache entry will expire old pmtu
1724 would return automatically.
1725 */
1726 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1727 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1728 if (allfrag) {
1729 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1730 features |= RTAX_FEATURE_ALLFRAG;
1731 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1732 }
d8d1f30b 1733 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1734 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1735 goto out;
1736 }
1737
1738 /* Network route.
1739 Two cases are possible:
1740 1. It is connected route. Action: COW
1741 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1742 */
27217455 1743 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1744 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1745 else
1746 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1747
d5315b50 1748 if (nrt) {
defb3519
DM
1749 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1750 if (allfrag) {
1751 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1752 features |= RTAX_FEATURE_ALLFRAG;
1753 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1754 }
a1e78363
YH
1755
1756 /* According to RFC 1981, detecting PMTU increase shouldn't be
1757 * happened within 5 mins, the recommended timer is 10 mins.
1758 * Here this route expiration time is set to ip6_rt_mtu_expires
1759 * which is 10 mins. After 10 mins the decreased pmtu is expired
1760 * and detecting PMTU increase will be automatically happened.
1761 */
d8d1f30b 1762 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1763 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1764
40e22e8f 1765 ip6_ins_rt(nrt);
1da177e4 1766 }
1da177e4 1767out:
d8d1f30b 1768 dst_release(&rt->dst);
1da177e4
LT
1769}
1770
b71d1d42 1771void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1772 struct net_device *dev, u32 pmtu)
1773{
1774 struct net *net = dev_net(dev);
1775
1776 /*
1777 * RFC 1981 states that a node "MUST reduce the size of the packets it
1778 * is sending along the path" that caused the Packet Too Big message.
1779 * Since it's not possible in the general case to determine which
1780 * interface was used to send the original packet, we update the MTU
1781 * on the interface that will be used to send future packets. We also
1782 * update the MTU on the interface that received the Packet Too Big in
1783 * case the original packet was forced out that interface with
1784 * SO_BINDTODEVICE or similar. This is the next best thing to the
1785 * correct behaviour, which would be to update the MTU on all
1786 * interfaces.
1787 */
1788 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1789 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1790}
1791
1da177e4
LT
1792/*
1793 * Misc support functions
1794 */
1795
21efcfa0
ED
1796static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1797 const struct in6_addr *dest)
1da177e4 1798{
d1918542 1799 struct net *net = dev_net(ort->dst.dev);
5c1e6aa3 1800 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 1801 ort->dst.dev, 0);
1da177e4
LT
1802
1803 if (rt) {
d8d1f30b
CG
1804 rt->dst.input = ort->dst.input;
1805 rt->dst.output = ort->dst.output;
8e2ec639 1806 rt->dst.flags |= DST_HOST;
d8d1f30b 1807
4e3fd7a0 1808 rt->rt6i_dst.addr = *dest;
8e2ec639 1809 rt->rt6i_dst.plen = 128;
defb3519 1810 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1811 rt->dst.error = ort->dst.error;
1da177e4
LT
1812 rt->rt6i_idev = ort->rt6i_idev;
1813 if (rt->rt6i_idev)
1814 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1815 rt->dst.lastuse = jiffies;
d1918542 1816 rt->dst.expires = 0;
1da177e4 1817
4e3fd7a0 1818 rt->rt6i_gateway = ort->rt6i_gateway;
1da177e4
LT
1819 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1820 rt->rt6i_metric = 0;
1821
1da177e4
LT
1822#ifdef CONFIG_IPV6_SUBTREES
1823 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1824#endif
0f6c6392 1825 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1826 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1827 }
1828 return rt;
1829}
1830
70ceb4f5 1831#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1832static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1833 const struct in6_addr *prefix, int prefixlen,
1834 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1835{
1836 struct fib6_node *fn;
1837 struct rt6_info *rt = NULL;
c71099ac
TG
1838 struct fib6_table *table;
1839
efa2cea0 1840 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1841 if (!table)
c71099ac 1842 return NULL;
70ceb4f5 1843
c71099ac
TG
1844 write_lock_bh(&table->tb6_lock);
1845 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1846 if (!fn)
1847 goto out;
1848
d8d1f30b 1849 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1850 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1851 continue;
1852 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1853 continue;
1854 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1855 continue;
d8d1f30b 1856 dst_hold(&rt->dst);
70ceb4f5
YH
1857 break;
1858 }
1859out:
c71099ac 1860 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1861 return rt;
1862}
1863
efa2cea0 1864static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1865 const struct in6_addr *prefix, int prefixlen,
1866 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5
YH
1867 unsigned pref)
1868{
86872cb5
TG
1869 struct fib6_config cfg = {
1870 .fc_table = RT6_TABLE_INFO,
238fc7ea 1871 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1872 .fc_ifindex = ifindex,
1873 .fc_dst_len = prefixlen,
1874 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1875 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1876 .fc_nlinfo.pid = 0,
1877 .fc_nlinfo.nlh = NULL,
1878 .fc_nlinfo.nl_net = net,
86872cb5
TG
1879 };
1880
4e3fd7a0
AD
1881 cfg.fc_dst = *prefix;
1882 cfg.fc_gateway = *gwaddr;
70ceb4f5 1883
e317da96
YH
1884 /* We should treat it as a default route if prefix length is 0. */
1885 if (!prefixlen)
86872cb5 1886 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1887
86872cb5 1888 ip6_route_add(&cfg);
70ceb4f5 1889
efa2cea0 1890 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1891}
1892#endif
1893
b71d1d42 1894struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1895{
1da177e4 1896 struct rt6_info *rt;
c71099ac 1897 struct fib6_table *table;
1da177e4 1898
c346dca1 1899 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1900 if (!table)
c71099ac 1901 return NULL;
1da177e4 1902
c71099ac 1903 write_lock_bh(&table->tb6_lock);
d8d1f30b 1904 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1905 if (dev == rt->dst.dev &&
045927ff 1906 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1907 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1908 break;
1909 }
1910 if (rt)
d8d1f30b 1911 dst_hold(&rt->dst);
c71099ac 1912 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1913 return rt;
1914}
1915
b71d1d42 1916struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1917 struct net_device *dev,
1918 unsigned int pref)
1da177e4 1919{
86872cb5
TG
1920 struct fib6_config cfg = {
1921 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1922 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1923 .fc_ifindex = dev->ifindex,
1924 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1925 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1926 .fc_nlinfo.pid = 0,
1927 .fc_nlinfo.nlh = NULL,
c346dca1 1928 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1929 };
1da177e4 1930
4e3fd7a0 1931 cfg.fc_gateway = *gwaddr;
1da177e4 1932
86872cb5 1933 ip6_route_add(&cfg);
1da177e4 1934
1da177e4
LT
1935 return rt6_get_dflt_router(gwaddr, dev);
1936}
1937
7b4da532 1938void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1939{
1940 struct rt6_info *rt;
c71099ac
TG
1941 struct fib6_table *table;
1942
1943 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1944 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1945 if (!table)
c71099ac 1946 return;
1da177e4
LT
1947
1948restart:
c71099ac 1949 read_lock_bh(&table->tb6_lock);
d8d1f30b 1950 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1951 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1952 dst_hold(&rt->dst);
c71099ac 1953 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1954 ip6_del_rt(rt);
1da177e4
LT
1955 goto restart;
1956 }
1957 }
c71099ac 1958 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1959}
1960
5578689a
DL
1961static void rtmsg_to_fib6_config(struct net *net,
1962 struct in6_rtmsg *rtmsg,
86872cb5
TG
1963 struct fib6_config *cfg)
1964{
1965 memset(cfg, 0, sizeof(*cfg));
1966
1967 cfg->fc_table = RT6_TABLE_MAIN;
1968 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1969 cfg->fc_metric = rtmsg->rtmsg_metric;
1970 cfg->fc_expires = rtmsg->rtmsg_info;
1971 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1972 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1973 cfg->fc_flags = rtmsg->rtmsg_flags;
1974
5578689a 1975 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1976
4e3fd7a0
AD
1977 cfg->fc_dst = rtmsg->rtmsg_dst;
1978 cfg->fc_src = rtmsg->rtmsg_src;
1979 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1980}
1981
5578689a 1982int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1983{
86872cb5 1984 struct fib6_config cfg;
1da177e4
LT
1985 struct in6_rtmsg rtmsg;
1986 int err;
1987
1988 switch(cmd) {
1989 case SIOCADDRT: /* Add a route */
1990 case SIOCDELRT: /* Delete a route */
1991 if (!capable(CAP_NET_ADMIN))
1992 return -EPERM;
1993 err = copy_from_user(&rtmsg, arg,
1994 sizeof(struct in6_rtmsg));
1995 if (err)
1996 return -EFAULT;
86872cb5 1997
5578689a 1998 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1999
1da177e4
LT
2000 rtnl_lock();
2001 switch (cmd) {
2002 case SIOCADDRT:
86872cb5 2003 err = ip6_route_add(&cfg);
1da177e4
LT
2004 break;
2005 case SIOCDELRT:
86872cb5 2006 err = ip6_route_del(&cfg);
1da177e4
LT
2007 break;
2008 default:
2009 err = -EINVAL;
2010 }
2011 rtnl_unlock();
2012
2013 return err;
3ff50b79 2014 }
1da177e4
LT
2015
2016 return -EINVAL;
2017}
2018
2019/*
2020 * Drop the packet on the floor
2021 */
2022
d5fdd6ba 2023static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2024{
612f09e8 2025 int type;
adf30907 2026 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2027 switch (ipstats_mib_noroutes) {
2028 case IPSTATS_MIB_INNOROUTES:
0660e03f 2029 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2030 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2031 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2032 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2033 break;
2034 }
2035 /* FALLTHROUGH */
2036 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2037 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2038 ipstats_mib_noroutes);
612f09e8
YH
2039 break;
2040 }
3ffe533c 2041 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2042 kfree_skb(skb);
2043 return 0;
2044}
2045
9ce8ade0
TG
2046static int ip6_pkt_discard(struct sk_buff *skb)
2047{
612f09e8 2048 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2049}
2050
20380731 2051static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2052{
adf30907 2053 skb->dev = skb_dst(skb)->dev;
612f09e8 2054 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2055}
2056
6723ab54
DM
2057#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2058
9ce8ade0
TG
2059static int ip6_pkt_prohibit(struct sk_buff *skb)
2060{
612f09e8 2061 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2062}
2063
2064static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2065{
adf30907 2066 skb->dev = skb_dst(skb)->dev;
612f09e8 2067 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2068}
2069
6723ab54
DM
2070#endif
2071
1da177e4
LT
2072/*
2073 * Allocate a dst for local (unicast / anycast) address.
2074 */
2075
2076struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2077 const struct in6_addr *addr,
8f031519 2078 bool anycast)
1da177e4 2079{
c346dca1 2080 struct net *net = dev_net(idev->dev);
5c1e6aa3 2081 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 2082 net->loopback_dev, 0);
f83c7790 2083 int err;
1da177e4 2084
38308473 2085 if (!rt) {
40385653
BG
2086 if (net_ratelimit())
2087 pr_warning("IPv6: Maximum number of routes reached,"
2088 " consider increasing route/max_size.\n");
1da177e4 2089 return ERR_PTR(-ENOMEM);
40385653 2090 }
1da177e4 2091
1da177e4
LT
2092 in6_dev_hold(idev);
2093
11d53b49 2094 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2095 rt->dst.input = ip6_input;
2096 rt->dst.output = ip6_output;
1da177e4 2097 rt->rt6i_idev = idev;
d8d1f30b 2098 rt->dst.obsolete = -1;
1da177e4
LT
2099
2100 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2101 if (anycast)
2102 rt->rt6i_flags |= RTF_ANYCAST;
2103 else
1da177e4 2104 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2105 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2106 if (err) {
d8d1f30b 2107 dst_free(&rt->dst);
f83c7790 2108 return ERR_PTR(err);
1da177e4
LT
2109 }
2110
4e3fd7a0 2111 rt->rt6i_dst.addr = *addr;
1da177e4 2112 rt->rt6i_dst.plen = 128;
5578689a 2113 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2114
d8d1f30b 2115 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2116
2117 return rt;
2118}
2119
c3968a85
DW
2120int ip6_route_get_saddr(struct net *net,
2121 struct rt6_info *rt,
b71d1d42 2122 const struct in6_addr *daddr,
c3968a85
DW
2123 unsigned int prefs,
2124 struct in6_addr *saddr)
2125{
2126 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2127 int err = 0;
2128 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2129 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2130 else
2131 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2132 daddr, prefs, saddr);
2133 return err;
2134}
2135
2136/* remove deleted ip from prefsrc entries */
2137struct arg_dev_net_ip {
2138 struct net_device *dev;
2139 struct net *net;
2140 struct in6_addr *addr;
2141};
2142
2143static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2144{
2145 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2146 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2147 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2148
d1918542 2149 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2150 rt != net->ipv6.ip6_null_entry &&
2151 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2152 /* remove prefsrc entry */
2153 rt->rt6i_prefsrc.plen = 0;
2154 }
2155 return 0;
2156}
2157
2158void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2159{
2160 struct net *net = dev_net(ifp->idev->dev);
2161 struct arg_dev_net_ip adni = {
2162 .dev = ifp->idev->dev,
2163 .net = net,
2164 .addr = &ifp->addr,
2165 };
2166 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2167}
2168
8ed67789
DL
2169struct arg_dev_net {
2170 struct net_device *dev;
2171 struct net *net;
2172};
2173
1da177e4
LT
2174static int fib6_ifdown(struct rt6_info *rt, void *arg)
2175{
bc3ef660 2176 const struct arg_dev_net *adn = arg;
2177 const struct net_device *dev = adn->dev;
8ed67789 2178
d1918542 2179 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2180 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2181 return -1;
c159d30c 2182
1da177e4
LT
2183 return 0;
2184}
2185
f3db4851 2186void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2187{
8ed67789
DL
2188 struct arg_dev_net adn = {
2189 .dev = dev,
2190 .net = net,
2191 };
2192
2193 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2194 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2195}
2196
2197struct rt6_mtu_change_arg
2198{
2199 struct net_device *dev;
2200 unsigned mtu;
2201};
2202
2203static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2204{
2205 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2206 struct inet6_dev *idev;
2207
2208 /* In IPv6 pmtu discovery is not optional,
2209 so that RTAX_MTU lock cannot disable it.
2210 We still use this lock to block changes
2211 caused by addrconf/ndisc.
2212 */
2213
2214 idev = __in6_dev_get(arg->dev);
38308473 2215 if (!idev)
1da177e4
LT
2216 return 0;
2217
2218 /* For administrative MTU increase, there is no way to discover
2219 IPv6 PMTU increase, so PMTU increase should be updated here.
2220 Since RFC 1981 doesn't include administrative MTU increase
2221 update PMTU increase is a MUST. (i.e. jumbo frame)
2222 */
2223 /*
2224 If new MTU is less than route PMTU, this new MTU will be the
2225 lowest MTU in the path, update the route PMTU to reflect PMTU
2226 decreases; if new MTU is greater than route PMTU, and the
2227 old MTU is the lowest MTU in the path, update the route PMTU
2228 to reflect the increase. In this case if the other nodes' MTU
2229 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2230 PMTU discouvery.
2231 */
d1918542 2232 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2233 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2234 (dst_mtu(&rt->dst) >= arg->mtu ||
2235 (dst_mtu(&rt->dst) < arg->mtu &&
2236 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2237 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2238 }
1da177e4
LT
2239 return 0;
2240}
2241
2242void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2243{
c71099ac
TG
2244 struct rt6_mtu_change_arg arg = {
2245 .dev = dev,
2246 .mtu = mtu,
2247 };
1da177e4 2248
c346dca1 2249 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2250}
2251
ef7c79ed 2252static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2253 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2254 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2255 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2256 [RTA_PRIORITY] = { .type = NLA_U32 },
2257 [RTA_METRICS] = { .type = NLA_NESTED },
2258};
2259
2260static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2261 struct fib6_config *cfg)
1da177e4 2262{
86872cb5
TG
2263 struct rtmsg *rtm;
2264 struct nlattr *tb[RTA_MAX+1];
2265 int err;
1da177e4 2266
86872cb5
TG
2267 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2268 if (err < 0)
2269 goto errout;
1da177e4 2270
86872cb5
TG
2271 err = -EINVAL;
2272 rtm = nlmsg_data(nlh);
2273 memset(cfg, 0, sizeof(*cfg));
2274
2275 cfg->fc_table = rtm->rtm_table;
2276 cfg->fc_dst_len = rtm->rtm_dst_len;
2277 cfg->fc_src_len = rtm->rtm_src_len;
2278 cfg->fc_flags = RTF_UP;
2279 cfg->fc_protocol = rtm->rtm_protocol;
2280
2281 if (rtm->rtm_type == RTN_UNREACHABLE)
2282 cfg->fc_flags |= RTF_REJECT;
2283
ab79ad14
2284 if (rtm->rtm_type == RTN_LOCAL)
2285 cfg->fc_flags |= RTF_LOCAL;
2286
86872cb5
TG
2287 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2288 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2289 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2290
2291 if (tb[RTA_GATEWAY]) {
2292 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2293 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2294 }
86872cb5
TG
2295
2296 if (tb[RTA_DST]) {
2297 int plen = (rtm->rtm_dst_len + 7) >> 3;
2298
2299 if (nla_len(tb[RTA_DST]) < plen)
2300 goto errout;
2301
2302 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2303 }
86872cb5
TG
2304
2305 if (tb[RTA_SRC]) {
2306 int plen = (rtm->rtm_src_len + 7) >> 3;
2307
2308 if (nla_len(tb[RTA_SRC]) < plen)
2309 goto errout;
2310
2311 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2312 }
86872cb5 2313
c3968a85
DW
2314 if (tb[RTA_PREFSRC])
2315 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2316
86872cb5
TG
2317 if (tb[RTA_OIF])
2318 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2319
2320 if (tb[RTA_PRIORITY])
2321 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2322
2323 if (tb[RTA_METRICS]) {
2324 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2325 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2326 }
86872cb5
TG
2327
2328 if (tb[RTA_TABLE])
2329 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2330
2331 err = 0;
2332errout:
2333 return err;
1da177e4
LT
2334}
2335
c127ea2c 2336static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2337{
86872cb5
TG
2338 struct fib6_config cfg;
2339 int err;
1da177e4 2340
86872cb5
TG
2341 err = rtm_to_fib6_config(skb, nlh, &cfg);
2342 if (err < 0)
2343 return err;
2344
2345 return ip6_route_del(&cfg);
1da177e4
LT
2346}
2347
c127ea2c 2348static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2349{
86872cb5
TG
2350 struct fib6_config cfg;
2351 int err;
1da177e4 2352
86872cb5
TG
2353 err = rtm_to_fib6_config(skb, nlh, &cfg);
2354 if (err < 0)
2355 return err;
2356
2357 return ip6_route_add(&cfg);
1da177e4
LT
2358}
2359
339bf98f
TG
2360static inline size_t rt6_nlmsg_size(void)
2361{
2362 return NLMSG_ALIGN(sizeof(struct rtmsg))
2363 + nla_total_size(16) /* RTA_SRC */
2364 + nla_total_size(16) /* RTA_DST */
2365 + nla_total_size(16) /* RTA_GATEWAY */
2366 + nla_total_size(16) /* RTA_PREFSRC */
2367 + nla_total_size(4) /* RTA_TABLE */
2368 + nla_total_size(4) /* RTA_IIF */
2369 + nla_total_size(4) /* RTA_OIF */
2370 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2371 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2372 + nla_total_size(sizeof(struct rta_cacheinfo));
2373}
2374
191cd582
BH
2375static int rt6_fill_node(struct net *net,
2376 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2377 struct in6_addr *dst, struct in6_addr *src,
2378 int iif, int type, u32 pid, u32 seq,
7bc570c8 2379 int prefix, int nowait, unsigned int flags)
1da177e4 2380{
346f870b 2381 const struct inet_peer *peer;
1da177e4 2382 struct rtmsg *rtm;
2d7202bf 2383 struct nlmsghdr *nlh;
e3703b3d 2384 long expires;
9e762a4a 2385 u32 table;
f2c31e32 2386 struct neighbour *n;
346f870b 2387 u32 ts, tsage;
1da177e4
LT
2388
2389 if (prefix) { /* user wants prefix routes only */
2390 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2391 /* success since this is not a prefix route */
2392 return 1;
2393 }
2394 }
2395
2d7202bf 2396 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2397 if (!nlh)
26932566 2398 return -EMSGSIZE;
2d7202bf
TG
2399
2400 rtm = nlmsg_data(nlh);
1da177e4
LT
2401 rtm->rtm_family = AF_INET6;
2402 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2403 rtm->rtm_src_len = rt->rt6i_src.plen;
2404 rtm->rtm_tos = 0;
c71099ac 2405 if (rt->rt6i_table)
9e762a4a 2406 table = rt->rt6i_table->tb6_id;
c71099ac 2407 else
9e762a4a
PM
2408 table = RT6_TABLE_UNSPEC;
2409 rtm->rtm_table = table;
2d7202bf 2410 NLA_PUT_U32(skb, RTA_TABLE, table);
38308473 2411 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2412 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2413 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2414 rtm->rtm_type = RTN_LOCAL;
d1918542 2415 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2416 rtm->rtm_type = RTN_LOCAL;
2417 else
2418 rtm->rtm_type = RTN_UNICAST;
2419 rtm->rtm_flags = 0;
2420 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2421 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2422 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2423 rtm->rtm_protocol = RTPROT_REDIRECT;
2424 else if (rt->rt6i_flags & RTF_ADDRCONF)
2425 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2426 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2427 rtm->rtm_protocol = RTPROT_RA;
2428
38308473 2429 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2430 rtm->rtm_flags |= RTM_F_CLONED;
2431
2432 if (dst) {
2d7202bf 2433 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2434 rtm->rtm_dst_len = 128;
1da177e4 2435 } else if (rtm->rtm_dst_len)
2d7202bf 2436 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2437#ifdef CONFIG_IPV6_SUBTREES
2438 if (src) {
2d7202bf 2439 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2440 rtm->rtm_src_len = 128;
1da177e4 2441 } else if (rtm->rtm_src_len)
2d7202bf 2442 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2443#endif
7bc570c8
YH
2444 if (iif) {
2445#ifdef CONFIG_IPV6_MROUTE
2446 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2447 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2448 if (err <= 0) {
2449 if (!nowait) {
2450 if (err == 0)
2451 return 0;
2452 goto nla_put_failure;
2453 } else {
2454 if (err == -EMSGSIZE)
2455 goto nla_put_failure;
2456 }
2457 }
2458 } else
2459#endif
2460 NLA_PUT_U32(skb, RTA_IIF, iif);
2461 } else if (dst) {
1da177e4 2462 struct in6_addr saddr_buf;
c3968a85 2463 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2d7202bf 2464 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2465 }
2d7202bf 2466
c3968a85
DW
2467 if (rt->rt6i_prefsrc.plen) {
2468 struct in6_addr saddr_buf;
4e3fd7a0 2469 saddr_buf = rt->rt6i_prefsrc.addr;
c3968a85
DW
2470 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2471 }
2472
defb3519 2473 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2474 goto nla_put_failure;
2475
f2c31e32 2476 rcu_read_lock();
27217455 2477 n = dst_get_neighbour_noref(&rt->dst);
f2c31e32
ED
2478 if (n)
2479 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2480 rcu_read_unlock();
2d7202bf 2481
d8d1f30b 2482 if (rt->dst.dev)
d1918542 2483 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2d7202bf
TG
2484
2485 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2486
36e3deae
YH
2487 if (!(rt->rt6i_flags & RTF_EXPIRES))
2488 expires = 0;
d1918542
DM
2489 else if (rt->dst.expires - jiffies < INT_MAX)
2490 expires = rt->dst.expires - jiffies;
36e3deae
YH
2491 else
2492 expires = INT_MAX;
69cdf8f9 2493
346f870b
DM
2494 peer = rt->rt6i_peer;
2495 ts = tsage = 0;
2496 if (peer && peer->tcp_ts_stamp) {
2497 ts = peer->tcp_ts;
2498 tsage = get_seconds() - peer->tcp_ts_stamp;
2499 }
2500
2501 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
d8d1f30b 2502 expires, rt->dst.error) < 0)
e3703b3d 2503 goto nla_put_failure;
2d7202bf
TG
2504
2505 return nlmsg_end(skb, nlh);
2506
2507nla_put_failure:
26932566
PM
2508 nlmsg_cancel(skb, nlh);
2509 return -EMSGSIZE;
1da177e4
LT
2510}
2511
1b43af54 2512int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2513{
2514 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2515 int prefix;
2516
2d7202bf
TG
2517 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2518 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2519 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2520 } else
2521 prefix = 0;
2522
191cd582
BH
2523 return rt6_fill_node(arg->net,
2524 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2525 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2526 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2527}
2528
c127ea2c 2529static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2530{
3b1e0a65 2531 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2532 struct nlattr *tb[RTA_MAX+1];
2533 struct rt6_info *rt;
1da177e4 2534 struct sk_buff *skb;
ab364a6f 2535 struct rtmsg *rtm;
4c9483b2 2536 struct flowi6 fl6;
ab364a6f 2537 int err, iif = 0;
1da177e4 2538
ab364a6f
TG
2539 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2540 if (err < 0)
2541 goto errout;
1da177e4 2542
ab364a6f 2543 err = -EINVAL;
4c9483b2 2544 memset(&fl6, 0, sizeof(fl6));
1da177e4 2545
ab364a6f
TG
2546 if (tb[RTA_SRC]) {
2547 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2548 goto errout;
2549
4e3fd7a0 2550 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2551 }
2552
2553 if (tb[RTA_DST]) {
2554 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2555 goto errout;
2556
4e3fd7a0 2557 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2558 }
2559
2560 if (tb[RTA_IIF])
2561 iif = nla_get_u32(tb[RTA_IIF]);
2562
2563 if (tb[RTA_OIF])
4c9483b2 2564 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2565
2566 if (iif) {
2567 struct net_device *dev;
5578689a 2568 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2569 if (!dev) {
2570 err = -ENODEV;
ab364a6f 2571 goto errout;
1da177e4
LT
2572 }
2573 }
2574
ab364a6f 2575 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2576 if (!skb) {
ab364a6f
TG
2577 err = -ENOBUFS;
2578 goto errout;
2579 }
1da177e4 2580
ab364a6f
TG
2581 /* Reserve room for dummy headers, this skb can pass
2582 through good chunk of routing engine.
2583 */
459a98ed 2584 skb_reset_mac_header(skb);
ab364a6f 2585 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2586
4c9483b2 2587 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
d8d1f30b 2588 skb_dst_set(skb, &rt->dst);
1da177e4 2589
4c9483b2 2590 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2591 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2592 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2593 if (err < 0) {
ab364a6f
TG
2594 kfree_skb(skb);
2595 goto errout;
1da177e4
LT
2596 }
2597
5578689a 2598 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2599errout:
1da177e4 2600 return err;
1da177e4
LT
2601}
2602
86872cb5 2603void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2604{
2605 struct sk_buff *skb;
5578689a 2606 struct net *net = info->nl_net;
528c4ceb
DL
2607 u32 seq;
2608 int err;
2609
2610 err = -ENOBUFS;
38308473 2611 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2612
339bf98f 2613 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2614 if (!skb)
21713ebc
TG
2615 goto errout;
2616
191cd582 2617 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2618 event, info->pid, seq, 0, 0, 0);
26932566
PM
2619 if (err < 0) {
2620 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2621 WARN_ON(err == -EMSGSIZE);
2622 kfree_skb(skb);
2623 goto errout;
2624 }
1ce85fe4
PNA
2625 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2626 info->nlh, gfp_any());
2627 return;
21713ebc
TG
2628errout:
2629 if (err < 0)
5578689a 2630 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2631}
2632
8ed67789
DL
2633static int ip6_route_dev_notify(struct notifier_block *this,
2634 unsigned long event, void *data)
2635{
2636 struct net_device *dev = (struct net_device *)data;
c346dca1 2637 struct net *net = dev_net(dev);
8ed67789
DL
2638
2639 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2640 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2641 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2642#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2643 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2644 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2645 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2646 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2647#endif
2648 }
2649
2650 return NOTIFY_OK;
2651}
2652
1da177e4
LT
2653/*
2654 * /proc
2655 */
2656
2657#ifdef CONFIG_PROC_FS
2658
1da177e4
LT
2659struct rt6_proc_arg
2660{
2661 char *buffer;
2662 int offset;
2663 int length;
2664 int skip;
2665 int len;
2666};
2667
2668static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2669{
33120b30 2670 struct seq_file *m = p_arg;
69cce1d1 2671 struct neighbour *n;
1da177e4 2672
4b7a4274 2673 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2674
2675#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2676 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2677#else
33120b30 2678 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2679#endif
f2c31e32 2680 rcu_read_lock();
27217455 2681 n = dst_get_neighbour_noref(&rt->dst);
69cce1d1
DM
2682 if (n) {
2683 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2684 } else {
33120b30 2685 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2686 }
f2c31e32 2687 rcu_read_unlock();
33120b30 2688 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2689 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2690 rt->dst.__use, rt->rt6i_flags,
d1918542 2691 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2692 return 0;
2693}
2694
33120b30 2695static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2696{
f3db4851 2697 struct net *net = (struct net *)m->private;
32b293a5 2698 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2699 return 0;
2700}
1da177e4 2701
33120b30
AD
2702static int ipv6_route_open(struct inode *inode, struct file *file)
2703{
de05c557 2704 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2705}
2706
33120b30
AD
2707static const struct file_operations ipv6_route_proc_fops = {
2708 .owner = THIS_MODULE,
2709 .open = ipv6_route_open,
2710 .read = seq_read,
2711 .llseek = seq_lseek,
b6fcbdb4 2712 .release = single_release_net,
33120b30
AD
2713};
2714
1da177e4
LT
2715static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2716{
69ddb805 2717 struct net *net = (struct net *)seq->private;
1da177e4 2718 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2719 net->ipv6.rt6_stats->fib_nodes,
2720 net->ipv6.rt6_stats->fib_route_nodes,
2721 net->ipv6.rt6_stats->fib_rt_alloc,
2722 net->ipv6.rt6_stats->fib_rt_entries,
2723 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2724 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2725 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2726
2727 return 0;
2728}
2729
2730static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2731{
de05c557 2732 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2733}
2734
9a32144e 2735static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2736 .owner = THIS_MODULE,
2737 .open = rt6_stats_seq_open,
2738 .read = seq_read,
2739 .llseek = seq_lseek,
b6fcbdb4 2740 .release = single_release_net,
1da177e4
LT
2741};
2742#endif /* CONFIG_PROC_FS */
2743
2744#ifdef CONFIG_SYSCTL
2745
1da177e4 2746static
8d65af78 2747int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2748 void __user *buffer, size_t *lenp, loff_t *ppos)
2749{
c486da34
LAG
2750 struct net *net;
2751 int delay;
2752 if (!write)
1da177e4 2753 return -EINVAL;
c486da34
LAG
2754
2755 net = (struct net *)ctl->extra1;
2756 delay = net->ipv6.sysctl.flush_delay;
2757 proc_dointvec(ctl, write, buffer, lenp, ppos);
2758 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2759 return 0;
1da177e4
LT
2760}
2761
760f2d01 2762ctl_table ipv6_route_table_template[] = {
1ab1457c 2763 {
1da177e4 2764 .procname = "flush",
4990509f 2765 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2766 .maxlen = sizeof(int),
89c8b3a1 2767 .mode = 0200,
6d9f239a 2768 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2769 },
2770 {
1da177e4 2771 .procname = "gc_thresh",
9a7ec3a9 2772 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2773 .maxlen = sizeof(int),
2774 .mode = 0644,
6d9f239a 2775 .proc_handler = proc_dointvec,
1da177e4
LT
2776 },
2777 {
1da177e4 2778 .procname = "max_size",
4990509f 2779 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2780 .maxlen = sizeof(int),
2781 .mode = 0644,
6d9f239a 2782 .proc_handler = proc_dointvec,
1da177e4
LT
2783 },
2784 {
1da177e4 2785 .procname = "gc_min_interval",
4990509f 2786 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2787 .maxlen = sizeof(int),
2788 .mode = 0644,
6d9f239a 2789 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2790 },
2791 {
1da177e4 2792 .procname = "gc_timeout",
4990509f 2793 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2794 .maxlen = sizeof(int),
2795 .mode = 0644,
6d9f239a 2796 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2797 },
2798 {
1da177e4 2799 .procname = "gc_interval",
4990509f 2800 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2801 .maxlen = sizeof(int),
2802 .mode = 0644,
6d9f239a 2803 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2804 },
2805 {
1da177e4 2806 .procname = "gc_elasticity",
4990509f 2807 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2808 .maxlen = sizeof(int),
2809 .mode = 0644,
f3d3f616 2810 .proc_handler = proc_dointvec,
1da177e4
LT
2811 },
2812 {
1da177e4 2813 .procname = "mtu_expires",
4990509f 2814 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2815 .maxlen = sizeof(int),
2816 .mode = 0644,
6d9f239a 2817 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2818 },
2819 {
1da177e4 2820 .procname = "min_adv_mss",
4990509f 2821 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2822 .maxlen = sizeof(int),
2823 .mode = 0644,
f3d3f616 2824 .proc_handler = proc_dointvec,
1da177e4
LT
2825 },
2826 {
1da177e4 2827 .procname = "gc_min_interval_ms",
4990509f 2828 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2829 .maxlen = sizeof(int),
2830 .mode = 0644,
6d9f239a 2831 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2832 },
f8572d8f 2833 { }
1da177e4
LT
2834};
2835
2c8c1e72 2836struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2837{
2838 struct ctl_table *table;
2839
2840 table = kmemdup(ipv6_route_table_template,
2841 sizeof(ipv6_route_table_template),
2842 GFP_KERNEL);
5ee09105
YH
2843
2844 if (table) {
2845 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2846 table[0].extra1 = net;
86393e52 2847 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2848 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2849 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2850 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2851 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2852 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2853 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2854 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2855 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2856 }
2857
760f2d01
DL
2858 return table;
2859}
1da177e4
LT
2860#endif
2861
2c8c1e72 2862static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2863{
633d424b 2864 int ret = -ENOMEM;
8ed67789 2865
86393e52
AD
2866 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2867 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2868
fc66f95c
ED
2869 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2870 goto out_ip6_dst_ops;
2871
8ed67789
DL
2872 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2873 sizeof(*net->ipv6.ip6_null_entry),
2874 GFP_KERNEL);
2875 if (!net->ipv6.ip6_null_entry)
fc66f95c 2876 goto out_ip6_dst_entries;
d8d1f30b 2877 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2878 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2879 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2880 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2881 ip6_template_metrics, true);
8ed67789
DL
2882
2883#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2884 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2885 sizeof(*net->ipv6.ip6_prohibit_entry),
2886 GFP_KERNEL);
68fffc67
PZ
2887 if (!net->ipv6.ip6_prohibit_entry)
2888 goto out_ip6_null_entry;
d8d1f30b 2889 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2890 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2891 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2892 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2893 ip6_template_metrics, true);
8ed67789
DL
2894
2895 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2896 sizeof(*net->ipv6.ip6_blk_hole_entry),
2897 GFP_KERNEL);
68fffc67
PZ
2898 if (!net->ipv6.ip6_blk_hole_entry)
2899 goto out_ip6_prohibit_entry;
d8d1f30b 2900 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2901 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2902 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2903 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2904 ip6_template_metrics, true);
8ed67789
DL
2905#endif
2906
b339a47c
PZ
2907 net->ipv6.sysctl.flush_delay = 0;
2908 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2909 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2910 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2911 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2912 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2913 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2914 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2915
cdb18761
DL
2916#ifdef CONFIG_PROC_FS
2917 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2918 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2919#endif
6891a346
BT
2920 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2921
8ed67789
DL
2922 ret = 0;
2923out:
2924 return ret;
f2fc6a54 2925
68fffc67
PZ
2926#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2927out_ip6_prohibit_entry:
2928 kfree(net->ipv6.ip6_prohibit_entry);
2929out_ip6_null_entry:
2930 kfree(net->ipv6.ip6_null_entry);
2931#endif
fc66f95c
ED
2932out_ip6_dst_entries:
2933 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2934out_ip6_dst_ops:
f2fc6a54 2935 goto out;
cdb18761
DL
2936}
2937
2c8c1e72 2938static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2939{
2940#ifdef CONFIG_PROC_FS
2941 proc_net_remove(net, "ipv6_route");
2942 proc_net_remove(net, "rt6_stats");
2943#endif
8ed67789
DL
2944 kfree(net->ipv6.ip6_null_entry);
2945#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2946 kfree(net->ipv6.ip6_prohibit_entry);
2947 kfree(net->ipv6.ip6_blk_hole_entry);
2948#endif
41bb78b4 2949 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2950}
2951
2952static struct pernet_operations ip6_route_net_ops = {
2953 .init = ip6_route_net_init,
2954 .exit = ip6_route_net_exit,
2955};
2956
8ed67789
DL
2957static struct notifier_block ip6_route_dev_notifier = {
2958 .notifier_call = ip6_route_dev_notify,
2959 .priority = 0,
2960};
2961
433d49c3 2962int __init ip6_route_init(void)
1da177e4 2963{
433d49c3
DL
2964 int ret;
2965
9a7ec3a9
DL
2966 ret = -ENOMEM;
2967 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2968 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2969 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2970 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2971 goto out;
14e50e57 2972
fc66f95c 2973 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2974 if (ret)
bdb3289f 2975 goto out_kmem_cache;
bdb3289f 2976
fc66f95c
ED
2977 ret = register_pernet_subsys(&ip6_route_net_ops);
2978 if (ret)
2979 goto out_dst_entries;
2980
5dc121e9
AE
2981 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2982
8ed67789
DL
2983 /* Registering of the loopback is done before this portion of code,
2984 * the loopback reference in rt6_info will not be taken, do it
2985 * manually for init_net */
d8d1f30b 2986 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2987 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2988 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2989 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2990 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2991 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2992 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2993 #endif
433d49c3
DL
2994 ret = fib6_init();
2995 if (ret)
8ed67789 2996 goto out_register_subsys;
433d49c3 2997
433d49c3
DL
2998 ret = xfrm6_init();
2999 if (ret)
cdb18761 3000 goto out_fib6_init;
c35b7e72 3001
433d49c3
DL
3002 ret = fib6_rules_init();
3003 if (ret)
3004 goto xfrm6_init;
7e5449c2 3005
433d49c3 3006 ret = -ENOBUFS;
c7ac8679
GR
3007 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3008 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3009 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 3010 goto fib6_rules_init;
c127ea2c 3011
8ed67789 3012 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
3013 if (ret)
3014 goto fib6_rules_init;
8ed67789 3015
433d49c3
DL
3016out:
3017 return ret;
3018
3019fib6_rules_init:
433d49c3
DL
3020 fib6_rules_cleanup();
3021xfrm6_init:
433d49c3 3022 xfrm6_fini();
433d49c3 3023out_fib6_init:
433d49c3 3024 fib6_gc_cleanup();
8ed67789
DL
3025out_register_subsys:
3026 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
3027out_dst_entries:
3028 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3029out_kmem_cache:
f2fc6a54 3030 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3031 goto out;
1da177e4
LT
3032}
3033
3034void ip6_route_cleanup(void)
3035{
8ed67789 3036 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 3037 fib6_rules_cleanup();
1da177e4 3038 xfrm6_fini();
1da177e4 3039 fib6_gc_cleanup();
8ed67789 3040 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3041 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3042 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3043}