]> git.ipfire.org Git - people/ms/linux.git/blame - net/ipv6/route.c
ipv4: Handle PMTU in all ICMP error handlers.
[people/ms/linux.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
1da177e4
LT
60
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
1716a961 67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 68 const struct in6_addr *dest);
1da177e4 69static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 70static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 71static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73static void ip6_dst_destroy(struct dst_entry *);
74static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
569d3645 76static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
77
78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb);
81static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
82
70ceb4f5 83#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 84static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
85 const struct in6_addr *prefix, int prefixlen,
86 const struct in6_addr *gwaddr, int ifindex,
95c96174 87 unsigned int pref);
efa2cea0 88static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
89 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
91#endif
92
06582540
DM
93static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
94{
95 struct rt6_info *rt = (struct rt6_info *) dst;
96 struct inet_peer *peer;
97 u32 *p = NULL;
98
8e2ec639
YZ
99 if (!(rt->dst.flags & DST_HOST))
100 return NULL;
101
fbfe95a4 102 peer = rt6_get_peer_create(rt);
06582540
DM
103 if (peer) {
104 u32 *old_p = __DST_METRICS_PTR(old);
105 unsigned long prev, new;
106
107 p = peer->metrics;
108 if (inet_metrics_new(peer))
109 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
110
111 new = (unsigned long) p;
112 prev = cmpxchg(&dst->_metrics, old, new);
113
114 if (prev != old) {
115 p = __DST_METRICS_PTR(prev);
116 if (prev & DST_METRICS_READ_ONLY)
117 p = NULL;
118 }
119 }
120 return p;
121}
122
39232973
DM
123static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
124{
125 struct in6_addr *p = &rt->rt6i_gateway;
126
a7563f34 127 if (!ipv6_addr_any(p))
39232973
DM
128 return (const void *) p;
129 return daddr;
130}
131
d3aaeb38
DM
132static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
133{
39232973
DM
134 struct rt6_info *rt = (struct rt6_info *) dst;
135 struct neighbour *n;
136
137 daddr = choose_neigh_daddr(rt, daddr);
138 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
139 if (n)
140 return n;
141 return neigh_create(&nd_tbl, daddr, dst->dev);
142}
143
8ade06c6 144static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 145{
8ade06c6
DM
146 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
147 if (!n) {
148 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
149 if (IS_ERR(n))
150 return PTR_ERR(n);
151 }
f83c7790
DM
152 dst_set_neighbour(&rt->dst, n);
153
154 return 0;
d3aaeb38
DM
155}
156
9a7ec3a9 157static struct dst_ops ip6_dst_ops_template = {
1da177e4 158 .family = AF_INET6,
09640e63 159 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
160 .gc = ip6_dst_gc,
161 .gc_thresh = 1024,
162 .check = ip6_dst_check,
0dbaee3b 163 .default_advmss = ip6_default_advmss,
ebb762f2 164 .mtu = ip6_mtu,
06582540 165 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
166 .destroy = ip6_dst_destroy,
167 .ifdown = ip6_dst_ifdown,
168 .negative_advice = ip6_negative_advice,
169 .link_failure = ip6_link_failure,
170 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 171 .local_out = __ip6_local_out,
d3aaeb38 172 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
173};
174
ebb762f2 175static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 176{
618f9bc7
SK
177 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
178
179 return mtu ? : dst->dev->mtu;
ec831ea7
RD
180}
181
14e50e57
DM
182static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
183{
184}
185
0972ddb2
HB
186static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
187 unsigned long old)
188{
189 return NULL;
190}
191
14e50e57
DM
192static struct dst_ops ip6_dst_blackhole_ops = {
193 .family = AF_INET6,
09640e63 194 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
195 .destroy = ip6_dst_destroy,
196 .check = ip6_dst_check,
ebb762f2 197 .mtu = ip6_blackhole_mtu,
214f45c9 198 .default_advmss = ip6_default_advmss,
14e50e57 199 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 200 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 201 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
202};
203
62fa8a84
DM
204static const u32 ip6_template_metrics[RTAX_MAX] = {
205 [RTAX_HOPLIMIT - 1] = 255,
206};
207
bdb3289f 208static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
209 .dst = {
210 .__refcnt = ATOMIC_INIT(1),
211 .__use = 1,
212 .obsolete = -1,
213 .error = -ENETUNREACH,
d8d1f30b
CG
214 .input = ip6_pkt_discard,
215 .output = ip6_pkt_discard_out,
1da177e4
LT
216 },
217 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 218 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
219 .rt6i_metric = ~(u32) 0,
220 .rt6i_ref = ATOMIC_INIT(1),
221};
222
101367c2
TG
223#ifdef CONFIG_IPV6_MULTIPLE_TABLES
224
6723ab54
DM
225static int ip6_pkt_prohibit(struct sk_buff *skb);
226static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 227
280a34c8 228static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
229 .dst = {
230 .__refcnt = ATOMIC_INIT(1),
231 .__use = 1,
232 .obsolete = -1,
233 .error = -EACCES,
d8d1f30b
CG
234 .input = ip6_pkt_prohibit,
235 .output = ip6_pkt_prohibit_out,
101367c2
TG
236 },
237 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 238 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
239 .rt6i_metric = ~(u32) 0,
240 .rt6i_ref = ATOMIC_INIT(1),
241};
242
bdb3289f 243static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
244 .dst = {
245 .__refcnt = ATOMIC_INIT(1),
246 .__use = 1,
247 .obsolete = -1,
248 .error = -EINVAL,
d8d1f30b
CG
249 .input = dst_discard,
250 .output = dst_discard,
101367c2
TG
251 },
252 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 253 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
254 .rt6i_metric = ~(u32) 0,
255 .rt6i_ref = ATOMIC_INIT(1),
256};
257
258#endif
259
1da177e4 260/* allocate dst with ip6_dst_ops */
97bab73f 261static inline struct rt6_info *ip6_dst_alloc(struct net *net,
957c665f 262 struct net_device *dev,
8b96d22d
DM
263 int flags,
264 struct fib6_table *table)
1da177e4 265{
97bab73f
DM
266 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
267 0, 0, flags);
cf911662 268
97bab73f 269 if (rt) {
fbe58186 270 memset(&rt->rt6i_table, 0,
38308473 271 sizeof(*rt) - sizeof(struct dst_entry));
8b96d22d 272 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
97bab73f 273 }
cf911662 274 return rt;
1da177e4
LT
275}
276
277static void ip6_dst_destroy(struct dst_entry *dst)
278{
279 struct rt6_info *rt = (struct rt6_info *)dst;
280 struct inet6_dev *idev = rt->rt6i_idev;
281
8e2ec639
YZ
282 if (!(rt->dst.flags & DST_HOST))
283 dst_destroy_metrics_generic(dst);
284
38308473 285 if (idev) {
1da177e4
LT
286 rt->rt6i_idev = NULL;
287 in6_dev_put(idev);
1ab1457c 288 }
1716a961
G
289
290 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
291 dst_release(dst->from);
292
97bab73f
DM
293 if (rt6_has_peer(rt)) {
294 struct inet_peer *peer = rt6_peer_ptr(rt);
b3419363
DM
295 inet_putpeer(peer);
296 }
297}
298
6431cbc2
DM
299static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
300
301static u32 rt6_peer_genid(void)
302{
303 return atomic_read(&__rt6_peer_genid);
304}
305
b3419363
DM
306void rt6_bind_peer(struct rt6_info *rt, int create)
307{
97bab73f 308 struct inet_peer_base *base;
b3419363
DM
309 struct inet_peer *peer;
310
97bab73f
DM
311 base = inetpeer_base_ptr(rt->_rt6i_peer);
312 if (!base)
313 return;
314
315 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
7b34ca2a
DM
316 if (peer) {
317 if (!rt6_set_peer(rt, peer))
318 inet_putpeer(peer);
319 else
320 rt->rt6i_peer_genid = rt6_peer_genid();
321 }
1da177e4
LT
322}
323
324static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
325 int how)
326{
327 struct rt6_info *rt = (struct rt6_info *)dst;
328 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 329 struct net_device *loopback_dev =
c346dca1 330 dev_net(dev)->loopback_dev;
1da177e4 331
38308473 332 if (dev != loopback_dev && idev && idev->dev == dev) {
5a3e55d6
DL
333 struct inet6_dev *loopback_idev =
334 in6_dev_get(loopback_dev);
38308473 335 if (loopback_idev) {
1da177e4
LT
336 rt->rt6i_idev = loopback_idev;
337 in6_dev_put(idev);
338 }
339 }
340}
341
a50feda5 342static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 343{
1716a961
G
344 struct rt6_info *ort = NULL;
345
346 if (rt->rt6i_flags & RTF_EXPIRES) {
347 if (time_after(jiffies, rt->dst.expires))
a50feda5 348 return true;
1716a961
G
349 } else if (rt->dst.from) {
350 ort = (struct rt6_info *) rt->dst.from;
351 return (ort->rt6i_flags & RTF_EXPIRES) &&
352 time_after(jiffies, ort->dst.expires);
353 }
a50feda5 354 return false;
1da177e4
LT
355}
356
a50feda5 357static bool rt6_need_strict(const struct in6_addr *daddr)
c71099ac 358{
a02cec21
ED
359 return ipv6_addr_type(daddr) &
360 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
361}
362
1da177e4 363/*
c71099ac 364 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
365 */
366
8ed67789
DL
367static inline struct rt6_info *rt6_device_match(struct net *net,
368 struct rt6_info *rt,
b71d1d42 369 const struct in6_addr *saddr,
1da177e4 370 int oif,
d420895e 371 int flags)
1da177e4
LT
372{
373 struct rt6_info *local = NULL;
374 struct rt6_info *sprt;
375
dd3abc4e
YH
376 if (!oif && ipv6_addr_any(saddr))
377 goto out;
378
d8d1f30b 379 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 380 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
381
382 if (oif) {
1da177e4
LT
383 if (dev->ifindex == oif)
384 return sprt;
385 if (dev->flags & IFF_LOOPBACK) {
38308473 386 if (!sprt->rt6i_idev ||
1da177e4 387 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 388 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 389 continue;
1ab1457c 390 if (local && (!oif ||
1da177e4
LT
391 local->rt6i_idev->dev->ifindex == oif))
392 continue;
393 }
394 local = sprt;
395 }
dd3abc4e
YH
396 } else {
397 if (ipv6_chk_addr(net, saddr, dev,
398 flags & RT6_LOOKUP_F_IFACE))
399 return sprt;
1da177e4 400 }
dd3abc4e 401 }
1da177e4 402
dd3abc4e 403 if (oif) {
1da177e4
LT
404 if (local)
405 return local;
406
d420895e 407 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 408 return net->ipv6.ip6_null_entry;
1da177e4 409 }
dd3abc4e 410out:
1da177e4
LT
411 return rt;
412}
413
27097255
YH
414#ifdef CONFIG_IPV6_ROUTER_PREF
415static void rt6_probe(struct rt6_info *rt)
416{
f2c31e32 417 struct neighbour *neigh;
27097255
YH
418 /*
419 * Okay, this does not seem to be appropriate
420 * for now, however, we need to check if it
421 * is really so; aka Router Reachability Probing.
422 *
423 * Router Reachability Probe MUST be rate-limited
424 * to no more than one per minute.
425 */
f2c31e32 426 rcu_read_lock();
27217455 427 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
27097255 428 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 429 goto out;
27097255
YH
430 read_lock_bh(&neigh->lock);
431 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 432 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
433 struct in6_addr mcaddr;
434 struct in6_addr *target;
435
436 neigh->updated = jiffies;
437 read_unlock_bh(&neigh->lock);
438
439 target = (struct in6_addr *)&neigh->primary_key;
440 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 441 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 442 } else {
27097255 443 read_unlock_bh(&neigh->lock);
f2c31e32
ED
444 }
445out:
446 rcu_read_unlock();
27097255
YH
447}
448#else
449static inline void rt6_probe(struct rt6_info *rt)
450{
27097255
YH
451}
452#endif
453
1da177e4 454/*
554cfb7e 455 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 456 */
b6f99a21 457static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 458{
d1918542 459 struct net_device *dev = rt->dst.dev;
161980f4 460 if (!oif || dev->ifindex == oif)
554cfb7e 461 return 2;
161980f4
DM
462 if ((dev->flags & IFF_LOOPBACK) &&
463 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
464 return 1;
465 return 0;
554cfb7e 466}
1da177e4 467
b6f99a21 468static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 469{
f2c31e32 470 struct neighbour *neigh;
398bcbeb 471 int m;
f2c31e32
ED
472
473 rcu_read_lock();
27217455 474 neigh = dst_get_neighbour_noref(&rt->dst);
4d0c5911
YH
475 if (rt->rt6i_flags & RTF_NONEXTHOP ||
476 !(rt->rt6i_flags & RTF_GATEWAY))
477 m = 1;
478 else if (neigh) {
554cfb7e
YH
479 read_lock_bh(&neigh->lock);
480 if (neigh->nud_state & NUD_VALID)
4d0c5911 481 m = 2;
398bcbeb
YH
482#ifdef CONFIG_IPV6_ROUTER_PREF
483 else if (neigh->nud_state & NUD_FAILED)
484 m = 0;
485#endif
486 else
ea73ee23 487 m = 1;
554cfb7e 488 read_unlock_bh(&neigh->lock);
398bcbeb
YH
489 } else
490 m = 0;
f2c31e32 491 rcu_read_unlock();
554cfb7e 492 return m;
1da177e4
LT
493}
494
554cfb7e
YH
495static int rt6_score_route(struct rt6_info *rt, int oif,
496 int strict)
1da177e4 497{
4d0c5911 498 int m, n;
1ab1457c 499
4d0c5911 500 m = rt6_check_dev(rt, oif);
77d16f45 501 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 502 return -1;
ebacaaa0
YH
503#ifdef CONFIG_IPV6_ROUTER_PREF
504 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
505#endif
4d0c5911 506 n = rt6_check_neigh(rt);
557e92ef 507 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
508 return -1;
509 return m;
510}
511
f11e6659
DM
512static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
513 int *mpri, struct rt6_info *match)
554cfb7e 514{
f11e6659
DM
515 int m;
516
517 if (rt6_check_expired(rt))
518 goto out;
519
520 m = rt6_score_route(rt, oif, strict);
521 if (m < 0)
522 goto out;
523
524 if (m > *mpri) {
525 if (strict & RT6_LOOKUP_F_REACHABLE)
526 rt6_probe(match);
527 *mpri = m;
528 match = rt;
529 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
530 rt6_probe(rt);
531 }
532
533out:
534 return match;
535}
536
537static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
538 struct rt6_info *rr_head,
539 u32 metric, int oif, int strict)
540{
541 struct rt6_info *rt, *match;
554cfb7e 542 int mpri = -1;
1da177e4 543
f11e6659
DM
544 match = NULL;
545 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 546 rt = rt->dst.rt6_next)
f11e6659
DM
547 match = find_match(rt, oif, strict, &mpri, match);
548 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 549 rt = rt->dst.rt6_next)
f11e6659 550 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 551
f11e6659
DM
552 return match;
553}
1da177e4 554
f11e6659
DM
555static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
556{
557 struct rt6_info *match, *rt0;
8ed67789 558 struct net *net;
1da177e4 559
f11e6659
DM
560 rt0 = fn->rr_ptr;
561 if (!rt0)
562 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 563
f11e6659 564 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 565
554cfb7e 566 if (!match &&
f11e6659 567 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 568 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 569
554cfb7e 570 /* no entries matched; do round-robin */
f11e6659
DM
571 if (!next || next->rt6i_metric != rt0->rt6i_metric)
572 next = fn->leaf;
573
574 if (next != rt0)
575 fn->rr_ptr = next;
1da177e4 576 }
1da177e4 577
d1918542 578 net = dev_net(rt0->dst.dev);
a02cec21 579 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
580}
581
70ceb4f5
YH
582#ifdef CONFIG_IPV6_ROUTE_INFO
583int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 584 const struct in6_addr *gwaddr)
70ceb4f5 585{
c346dca1 586 struct net *net = dev_net(dev);
70ceb4f5
YH
587 struct route_info *rinfo = (struct route_info *) opt;
588 struct in6_addr prefix_buf, *prefix;
589 unsigned int pref;
4bed72e4 590 unsigned long lifetime;
70ceb4f5
YH
591 struct rt6_info *rt;
592
593 if (len < sizeof(struct route_info)) {
594 return -EINVAL;
595 }
596
597 /* Sanity check for prefix_len and length */
598 if (rinfo->length > 3) {
599 return -EINVAL;
600 } else if (rinfo->prefix_len > 128) {
601 return -EINVAL;
602 } else if (rinfo->prefix_len > 64) {
603 if (rinfo->length < 2) {
604 return -EINVAL;
605 }
606 } else if (rinfo->prefix_len > 0) {
607 if (rinfo->length < 1) {
608 return -EINVAL;
609 }
610 }
611
612 pref = rinfo->route_pref;
613 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 614 return -EINVAL;
70ceb4f5 615
4bed72e4 616 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
617
618 if (rinfo->length == 3)
619 prefix = (struct in6_addr *)rinfo->prefix;
620 else {
621 /* this function is safe */
622 ipv6_addr_prefix(&prefix_buf,
623 (struct in6_addr *)rinfo->prefix,
624 rinfo->prefix_len);
625 prefix = &prefix_buf;
626 }
627
efa2cea0
DL
628 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
629 dev->ifindex);
70ceb4f5
YH
630
631 if (rt && !lifetime) {
e0a1ad73 632 ip6_del_rt(rt);
70ceb4f5
YH
633 rt = NULL;
634 }
635
636 if (!rt && lifetime)
efa2cea0 637 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
638 pref);
639 else if (rt)
640 rt->rt6i_flags = RTF_ROUTEINFO |
641 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
642
643 if (rt) {
1716a961
G
644 if (!addrconf_finite_timeout(lifetime))
645 rt6_clean_expires(rt);
646 else
647 rt6_set_expires(rt, jiffies + HZ * lifetime);
648
d8d1f30b 649 dst_release(&rt->dst);
70ceb4f5
YH
650 }
651 return 0;
652}
653#endif
654
8ed67789 655#define BACKTRACK(__net, saddr) \
982f56f3 656do { \
8ed67789 657 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 658 struct fib6_node *pn; \
e0eda7bb 659 while (1) { \
982f56f3
YH
660 if (fn->fn_flags & RTN_TL_ROOT) \
661 goto out; \
662 pn = fn->parent; \
663 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 664 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
665 else \
666 fn = pn; \
667 if (fn->fn_flags & RTN_RTINFO) \
668 goto restart; \
c71099ac 669 } \
c71099ac 670 } \
38308473 671} while (0)
c71099ac 672
8ed67789
DL
673static struct rt6_info *ip6_pol_route_lookup(struct net *net,
674 struct fib6_table *table,
4c9483b2 675 struct flowi6 *fl6, int flags)
1da177e4
LT
676{
677 struct fib6_node *fn;
678 struct rt6_info *rt;
679
c71099ac 680 read_lock_bh(&table->tb6_lock);
4c9483b2 681 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
682restart:
683 rt = fn->leaf;
4c9483b2
DM
684 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
685 BACKTRACK(net, &fl6->saddr);
c71099ac 686out:
d8d1f30b 687 dst_use(&rt->dst, jiffies);
c71099ac 688 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
689 return rt;
690
691}
692
ea6e574e
FW
693struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
694 int flags)
695{
696 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
697}
698EXPORT_SYMBOL_GPL(ip6_route_lookup);
699
9acd9f3a
YH
700struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
701 const struct in6_addr *saddr, int oif, int strict)
c71099ac 702{
4c9483b2
DM
703 struct flowi6 fl6 = {
704 .flowi6_oif = oif,
705 .daddr = *daddr,
c71099ac
TG
706 };
707 struct dst_entry *dst;
77d16f45 708 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 709
adaa70bb 710 if (saddr) {
4c9483b2 711 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
712 flags |= RT6_LOOKUP_F_HAS_SADDR;
713 }
714
4c9483b2 715 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
716 if (dst->error == 0)
717 return (struct rt6_info *) dst;
718
719 dst_release(dst);
720
1da177e4
LT
721 return NULL;
722}
723
7159039a
YH
724EXPORT_SYMBOL(rt6_lookup);
725
c71099ac 726/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
727 It takes new route entry, the addition fails by any reason the
728 route is freed. In any case, if caller does not hold it, it may
729 be destroyed.
730 */
731
86872cb5 732static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
733{
734 int err;
c71099ac 735 struct fib6_table *table;
1da177e4 736
c71099ac
TG
737 table = rt->rt6i_table;
738 write_lock_bh(&table->tb6_lock);
86872cb5 739 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 740 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
741
742 return err;
743}
744
40e22e8f
TG
745int ip6_ins_rt(struct rt6_info *rt)
746{
4d1169c1 747 struct nl_info info = {
d1918542 748 .nl_net = dev_net(rt->dst.dev),
4d1169c1 749 };
528c4ceb 750 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
751}
752
1716a961 753static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 754 const struct in6_addr *daddr,
b71d1d42 755 const struct in6_addr *saddr)
1da177e4 756{
1da177e4
LT
757 struct rt6_info *rt;
758
759 /*
760 * Clone the route.
761 */
762
21efcfa0 763 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
764
765 if (rt) {
14deae41
DM
766 int attempts = !in_softirq();
767
38308473 768 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 769 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 770 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 771 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 772 rt->rt6i_gateway = *daddr;
58c4fb86 773 }
1da177e4 774
1da177e4 775 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
776
777#ifdef CONFIG_IPV6_SUBTREES
778 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 779 rt->rt6i_src.addr = *saddr;
1da177e4
LT
780 rt->rt6i_src.plen = 128;
781 }
782#endif
783
14deae41 784 retry:
8ade06c6 785 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 786 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
787 int saved_rt_min_interval =
788 net->ipv6.sysctl.ip6_rt_gc_min_interval;
789 int saved_rt_elasticity =
790 net->ipv6.sysctl.ip6_rt_gc_elasticity;
791
792 if (attempts-- > 0) {
793 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
794 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
795
86393e52 796 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
797
798 net->ipv6.sysctl.ip6_rt_gc_elasticity =
799 saved_rt_elasticity;
800 net->ipv6.sysctl.ip6_rt_gc_min_interval =
801 saved_rt_min_interval;
802 goto retry;
803 }
804
f3213831 805 net_warn_ratelimited("Neighbour table overflow\n");
d8d1f30b 806 dst_free(&rt->dst);
14deae41
DM
807 return NULL;
808 }
95a9a5ba 809 }
1da177e4 810
95a9a5ba
YH
811 return rt;
812}
1da177e4 813
21efcfa0
ED
814static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
815 const struct in6_addr *daddr)
299d9939 816{
21efcfa0
ED
817 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
818
299d9939 819 if (rt) {
299d9939 820 rt->rt6i_flags |= RTF_CACHE;
27217455 821 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
299d9939
YH
822 }
823 return rt;
824}
825
8ed67789 826static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 827 struct flowi6 *fl6, int flags)
1da177e4
LT
828{
829 struct fib6_node *fn;
519fbd87 830 struct rt6_info *rt, *nrt;
c71099ac 831 int strict = 0;
1da177e4 832 int attempts = 3;
519fbd87 833 int err;
53b7997f 834 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 835
77d16f45 836 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
837
838relookup:
c71099ac 839 read_lock_bh(&table->tb6_lock);
1da177e4 840
8238dd06 841restart_2:
4c9483b2 842 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
843
844restart:
4acad72d 845 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 846
4c9483b2 847 BACKTRACK(net, &fl6->saddr);
8ed67789 848 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 849 rt->rt6i_flags & RTF_CACHE)
1ddef044 850 goto out;
1da177e4 851
d8d1f30b 852 dst_hold(&rt->dst);
c71099ac 853 read_unlock_bh(&table->tb6_lock);
fb9de91e 854
27217455 855 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 856 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 857 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 858 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
859 else
860 goto out2;
e40cf353 861
d8d1f30b 862 dst_release(&rt->dst);
8ed67789 863 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 864
d8d1f30b 865 dst_hold(&rt->dst);
519fbd87 866 if (nrt) {
40e22e8f 867 err = ip6_ins_rt(nrt);
519fbd87 868 if (!err)
1da177e4 869 goto out2;
1da177e4 870 }
1da177e4 871
519fbd87
YH
872 if (--attempts <= 0)
873 goto out2;
874
875 /*
c71099ac 876 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
877 * released someone could insert this route. Relookup.
878 */
d8d1f30b 879 dst_release(&rt->dst);
519fbd87
YH
880 goto relookup;
881
882out:
8238dd06
YH
883 if (reachable) {
884 reachable = 0;
885 goto restart_2;
886 }
d8d1f30b 887 dst_hold(&rt->dst);
c71099ac 888 read_unlock_bh(&table->tb6_lock);
1da177e4 889out2:
d8d1f30b
CG
890 rt->dst.lastuse = jiffies;
891 rt->dst.__use++;
c71099ac
TG
892
893 return rt;
1da177e4
LT
894}
895
8ed67789 896static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 897 struct flowi6 *fl6, int flags)
4acad72d 898{
4c9483b2 899 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
900}
901
72331bc0
SL
902static struct dst_entry *ip6_route_input_lookup(struct net *net,
903 struct net_device *dev,
904 struct flowi6 *fl6, int flags)
905{
906 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
907 flags |= RT6_LOOKUP_F_IFACE;
908
909 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
910}
911
c71099ac
TG
912void ip6_route_input(struct sk_buff *skb)
913{
b71d1d42 914 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 915 struct net *net = dev_net(skb->dev);
adaa70bb 916 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
917 struct flowi6 fl6 = {
918 .flowi6_iif = skb->dev->ifindex,
919 .daddr = iph->daddr,
920 .saddr = iph->saddr,
38308473 921 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
922 .flowi6_mark = skb->mark,
923 .flowi6_proto = iph->nexthdr,
c71099ac 924 };
adaa70bb 925
72331bc0 926 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
927}
928
8ed67789 929static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 930 struct flowi6 *fl6, int flags)
1da177e4 931{
4c9483b2 932 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
933}
934
9c7a4f9c 935struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 936 struct flowi6 *fl6)
c71099ac
TG
937{
938 int flags = 0;
939
4c9483b2 940 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 941 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 942
4c9483b2 943 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 944 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
945 else if (sk)
946 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 947
4c9483b2 948 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
949}
950
7159039a 951EXPORT_SYMBOL(ip6_route_output);
1da177e4 952
2774c131 953struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 954{
5c1e6aa3 955 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
956 struct dst_entry *new = NULL;
957
5c1e6aa3 958 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 959 if (rt) {
cf911662 960 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
97bab73f 961 rt6_init_peer(rt, net->ipv6.peers);
cf911662 962
d8d1f30b 963 new = &rt->dst;
14e50e57 964
14e50e57 965 new->__use = 1;
352e512c
HX
966 new->input = dst_discard;
967 new->output = dst_discard;
14e50e57 968
21efcfa0
ED
969 if (dst_metrics_read_only(&ort->dst))
970 new->_metrics = ort->dst._metrics;
971 else
972 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
973 rt->rt6i_idev = ort->rt6i_idev;
974 if (rt->rt6i_idev)
975 in6_dev_hold(rt->rt6i_idev);
14e50e57 976
4e3fd7a0 977 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
978 rt->rt6i_flags = ort->rt6i_flags;
979 rt6_clean_expires(rt);
14e50e57
DM
980 rt->rt6i_metric = 0;
981
982 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
983#ifdef CONFIG_IPV6_SUBTREES
984 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
985#endif
986
987 dst_free(new);
988 }
989
69ead7af
DM
990 dst_release(dst_orig);
991 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 992}
14e50e57 993
1da177e4
LT
994/*
995 * Destination cache support functions
996 */
997
998static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
999{
1000 struct rt6_info *rt;
1001
1002 rt = (struct rt6_info *) dst;
1003
6431cbc2
DM
1004 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1005 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
97bab73f 1006 if (!rt6_has_peer(rt))
6431cbc2
DM
1007 rt6_bind_peer(rt, 0);
1008 rt->rt6i_peer_genid = rt6_peer_genid();
1009 }
1da177e4 1010 return dst;
6431cbc2 1011 }
1da177e4
LT
1012 return NULL;
1013}
1014
1015static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1016{
1017 struct rt6_info *rt = (struct rt6_info *) dst;
1018
1019 if (rt) {
54c1a859
YH
1020 if (rt->rt6i_flags & RTF_CACHE) {
1021 if (rt6_check_expired(rt)) {
1022 ip6_del_rt(rt);
1023 dst = NULL;
1024 }
1025 } else {
1da177e4 1026 dst_release(dst);
54c1a859
YH
1027 dst = NULL;
1028 }
1da177e4 1029 }
54c1a859 1030 return dst;
1da177e4
LT
1031}
1032
1033static void ip6_link_failure(struct sk_buff *skb)
1034{
1035 struct rt6_info *rt;
1036
3ffe533c 1037 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1038
adf30907 1039 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1040 if (rt) {
1716a961
G
1041 if (rt->rt6i_flags & RTF_CACHE)
1042 rt6_update_expires(rt, 0);
1043 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1da177e4
LT
1044 rt->rt6i_node->fn_sernum = -1;
1045 }
1046}
1047
1048static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1049{
1050 struct rt6_info *rt6 = (struct rt6_info*)dst;
1051
1052 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1053 rt6->rt6i_flags |= RTF_MODIFIED;
1054 if (mtu < IPV6_MIN_MTU) {
defb3519 1055 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1056 mtu = IPV6_MIN_MTU;
defb3519
DM
1057 features |= RTAX_FEATURE_ALLFRAG;
1058 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1059 }
defb3519 1060 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
1061 }
1062}
1063
0dbaee3b 1064static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1065{
0dbaee3b
DM
1066 struct net_device *dev = dst->dev;
1067 unsigned int mtu = dst_mtu(dst);
1068 struct net *net = dev_net(dev);
1069
1da177e4
LT
1070 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1071
5578689a
DL
1072 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1073 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1074
1075 /*
1ab1457c
YH
1076 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1077 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1078 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1079 * rely only on pmtu discovery"
1080 */
1081 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1082 mtu = IPV6_MAXPLEN;
1083 return mtu;
1084}
1085
ebb762f2 1086static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1087{
d33e4553 1088 struct inet6_dev *idev;
618f9bc7
SK
1089 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1090
1091 if (mtu)
1092 return mtu;
1093
1094 mtu = IPV6_MIN_MTU;
d33e4553
DM
1095
1096 rcu_read_lock();
1097 idev = __in6_dev_get(dst->dev);
1098 if (idev)
1099 mtu = idev->cnf.mtu6;
1100 rcu_read_unlock();
1101
1102 return mtu;
1103}
1104
3b00944c
YH
1105static struct dst_entry *icmp6_dst_gc_list;
1106static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1107
3b00944c 1108struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1109 struct neighbour *neigh,
87a11578 1110 struct flowi6 *fl6)
1da177e4 1111{
87a11578 1112 struct dst_entry *dst;
1da177e4
LT
1113 struct rt6_info *rt;
1114 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1115 struct net *net = dev_net(dev);
1da177e4 1116
38308473 1117 if (unlikely(!idev))
122bdf67 1118 return ERR_PTR(-ENODEV);
1da177e4 1119
8b96d22d 1120 rt = ip6_dst_alloc(net, dev, 0, NULL);
38308473 1121 if (unlikely(!rt)) {
1da177e4 1122 in6_dev_put(idev);
87a11578 1123 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1124 goto out;
1125 }
1126
1da177e4
LT
1127 if (neigh)
1128 neigh_hold(neigh);
14deae41 1129 else {
f83c7790 1130 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
b43faac6 1131 if (IS_ERR(neigh)) {
252c3d84 1132 in6_dev_put(idev);
b43faac6
DM
1133 dst_free(&rt->dst);
1134 return ERR_CAST(neigh);
1135 }
14deae41 1136 }
1da177e4 1137
8e2ec639
YZ
1138 rt->dst.flags |= DST_HOST;
1139 rt->dst.output = ip6_output;
69cce1d1 1140 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1141 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1142 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1143 rt->rt6i_dst.plen = 128;
1144 rt->rt6i_idev = idev;
7011687f 1145 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1146
3b00944c 1147 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1148 rt->dst.next = icmp6_dst_gc_list;
1149 icmp6_dst_gc_list = &rt->dst;
3b00944c 1150 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1151
5578689a 1152 fib6_force_start_gc(net);
1da177e4 1153
87a11578
DM
1154 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1155
1da177e4 1156out:
87a11578 1157 return dst;
1da177e4
LT
1158}
1159
3d0f24a7 1160int icmp6_dst_gc(void)
1da177e4 1161{
e9476e95 1162 struct dst_entry *dst, **pprev;
3d0f24a7 1163 int more = 0;
1da177e4 1164
3b00944c
YH
1165 spin_lock_bh(&icmp6_dst_lock);
1166 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1167
1da177e4
LT
1168 while ((dst = *pprev) != NULL) {
1169 if (!atomic_read(&dst->__refcnt)) {
1170 *pprev = dst->next;
1171 dst_free(dst);
1da177e4
LT
1172 } else {
1173 pprev = &dst->next;
3d0f24a7 1174 ++more;
1da177e4
LT
1175 }
1176 }
1177
3b00944c 1178 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1179
3d0f24a7 1180 return more;
1da177e4
LT
1181}
1182
1e493d19
DM
1183static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1184 void *arg)
1185{
1186 struct dst_entry *dst, **pprev;
1187
1188 spin_lock_bh(&icmp6_dst_lock);
1189 pprev = &icmp6_dst_gc_list;
1190 while ((dst = *pprev) != NULL) {
1191 struct rt6_info *rt = (struct rt6_info *) dst;
1192 if (func(rt, arg)) {
1193 *pprev = dst->next;
1194 dst_free(dst);
1195 } else {
1196 pprev = &dst->next;
1197 }
1198 }
1199 spin_unlock_bh(&icmp6_dst_lock);
1200}
1201
569d3645 1202static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1203{
1da177e4 1204 unsigned long now = jiffies;
86393e52 1205 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1206 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1207 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1208 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1209 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1210 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1211 int entries;
7019b78e 1212
fc66f95c 1213 entries = dst_entries_get_fast(ops);
7019b78e 1214 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1215 entries <= rt_max_size)
1da177e4
LT
1216 goto out;
1217
6891a346
BT
1218 net->ipv6.ip6_rt_gc_expire++;
1219 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1220 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1221 entries = dst_entries_get_slow(ops);
1222 if (entries < ops->gc_thresh)
7019b78e 1223 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1224out:
7019b78e 1225 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1226 return entries > rt_max_size;
1da177e4
LT
1227}
1228
1229/* Clean host part of a prefix. Not necessary in radix tree,
1230 but results in cleaner routing tables.
1231
1232 Remove it only when all the things will work!
1233 */
1234
6b75d090 1235int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1236{
5170ae82 1237 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1238 if (hoplimit == 0) {
6b75d090 1239 struct net_device *dev = dst->dev;
c68f24cc
ED
1240 struct inet6_dev *idev;
1241
1242 rcu_read_lock();
1243 idev = __in6_dev_get(dev);
1244 if (idev)
6b75d090 1245 hoplimit = idev->cnf.hop_limit;
c68f24cc 1246 else
53b7997f 1247 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1248 rcu_read_unlock();
1da177e4
LT
1249 }
1250 return hoplimit;
1251}
abbf46ae 1252EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1253
1254/*
1255 *
1256 */
1257
86872cb5 1258int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1259{
1260 int err;
5578689a 1261 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1262 struct rt6_info *rt = NULL;
1263 struct net_device *dev = NULL;
1264 struct inet6_dev *idev = NULL;
c71099ac 1265 struct fib6_table *table;
1da177e4
LT
1266 int addr_type;
1267
86872cb5 1268 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1269 return -EINVAL;
1270#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1271 if (cfg->fc_src_len)
1da177e4
LT
1272 return -EINVAL;
1273#endif
86872cb5 1274 if (cfg->fc_ifindex) {
1da177e4 1275 err = -ENODEV;
5578689a 1276 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1277 if (!dev)
1278 goto out;
1279 idev = in6_dev_get(dev);
1280 if (!idev)
1281 goto out;
1282 }
1283
86872cb5
TG
1284 if (cfg->fc_metric == 0)
1285 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1286
d71314b4 1287 err = -ENOBUFS;
38308473
DM
1288 if (cfg->fc_nlinfo.nlh &&
1289 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1290 table = fib6_get_table(net, cfg->fc_table);
38308473 1291 if (!table) {
f3213831 1292 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1293 table = fib6_new_table(net, cfg->fc_table);
1294 }
1295 } else {
1296 table = fib6_new_table(net, cfg->fc_table);
1297 }
38308473
DM
1298
1299 if (!table)
c71099ac 1300 goto out;
c71099ac 1301
8b96d22d 1302 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1da177e4 1303
38308473 1304 if (!rt) {
1da177e4
LT
1305 err = -ENOMEM;
1306 goto out;
1307 }
1308
d8d1f30b 1309 rt->dst.obsolete = -1;
1716a961
G
1310
1311 if (cfg->fc_flags & RTF_EXPIRES)
1312 rt6_set_expires(rt, jiffies +
1313 clock_t_to_jiffies(cfg->fc_expires));
1314 else
1315 rt6_clean_expires(rt);
1da177e4 1316
86872cb5
TG
1317 if (cfg->fc_protocol == RTPROT_UNSPEC)
1318 cfg->fc_protocol = RTPROT_BOOT;
1319 rt->rt6i_protocol = cfg->fc_protocol;
1320
1321 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1322
1323 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1324 rt->dst.input = ip6_mc_input;
ab79ad14
1325 else if (cfg->fc_flags & RTF_LOCAL)
1326 rt->dst.input = ip6_input;
1da177e4 1327 else
d8d1f30b 1328 rt->dst.input = ip6_forward;
1da177e4 1329
d8d1f30b 1330 rt->dst.output = ip6_output;
1da177e4 1331
86872cb5
TG
1332 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1333 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1334 if (rt->rt6i_dst.plen == 128)
11d53b49 1335 rt->dst.flags |= DST_HOST;
1da177e4 1336
8e2ec639
YZ
1337 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1338 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1339 if (!metrics) {
1340 err = -ENOMEM;
1341 goto out;
1342 }
1343 dst_init_metrics(&rt->dst, metrics, 0);
1344 }
1da177e4 1345#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1346 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1347 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1348#endif
1349
86872cb5 1350 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1351
1352 /* We cannot add true routes via loopback here,
1353 they would result in kernel looping; promote them to reject routes
1354 */
86872cb5 1355 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1356 (dev && (dev->flags & IFF_LOOPBACK) &&
1357 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1358 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1359 /* hold loopback dev/idev if we haven't done so. */
5578689a 1360 if (dev != net->loopback_dev) {
1da177e4
LT
1361 if (dev) {
1362 dev_put(dev);
1363 in6_dev_put(idev);
1364 }
5578689a 1365 dev = net->loopback_dev;
1da177e4
LT
1366 dev_hold(dev);
1367 idev = in6_dev_get(dev);
1368 if (!idev) {
1369 err = -ENODEV;
1370 goto out;
1371 }
1372 }
d8d1f30b
CG
1373 rt->dst.output = ip6_pkt_discard_out;
1374 rt->dst.input = ip6_pkt_discard;
1375 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1376 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1377 goto install_route;
1378 }
1379
86872cb5 1380 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1381 const struct in6_addr *gw_addr;
1da177e4
LT
1382 int gwa_type;
1383
86872cb5 1384 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1385 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1386 gwa_type = ipv6_addr_type(gw_addr);
1387
1388 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1389 struct rt6_info *grt;
1390
1391 /* IPv6 strictly inhibits using not link-local
1392 addresses as nexthop address.
1393 Otherwise, router will not able to send redirects.
1394 It is very good, but in some (rare!) circumstances
1395 (SIT, PtP, NBMA NOARP links) it is handy to allow
1396 some exceptions. --ANK
1397 */
1398 err = -EINVAL;
38308473 1399 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1400 goto out;
1401
5578689a 1402 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1403
1404 err = -EHOSTUNREACH;
38308473 1405 if (!grt)
1da177e4
LT
1406 goto out;
1407 if (dev) {
d1918542 1408 if (dev != grt->dst.dev) {
d8d1f30b 1409 dst_release(&grt->dst);
1da177e4
LT
1410 goto out;
1411 }
1412 } else {
d1918542 1413 dev = grt->dst.dev;
1da177e4
LT
1414 idev = grt->rt6i_idev;
1415 dev_hold(dev);
1416 in6_dev_hold(grt->rt6i_idev);
1417 }
38308473 1418 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1419 err = 0;
d8d1f30b 1420 dst_release(&grt->dst);
1da177e4
LT
1421
1422 if (err)
1423 goto out;
1424 }
1425 err = -EINVAL;
38308473 1426 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1427 goto out;
1428 }
1429
1430 err = -ENODEV;
38308473 1431 if (!dev)
1da177e4
LT
1432 goto out;
1433
c3968a85
DW
1434 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1435 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1436 err = -EINVAL;
1437 goto out;
1438 }
4e3fd7a0 1439 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1440 rt->rt6i_prefsrc.plen = 128;
1441 } else
1442 rt->rt6i_prefsrc.plen = 0;
1443
86872cb5 1444 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1445 err = rt6_bind_neighbour(rt, dev);
f83c7790 1446 if (err)
1da177e4 1447 goto out;
1da177e4
LT
1448 }
1449
86872cb5 1450 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1451
1452install_route:
86872cb5
TG
1453 if (cfg->fc_mx) {
1454 struct nlattr *nla;
1455 int remaining;
1456
1457 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1458 int type = nla_type(nla);
86872cb5
TG
1459
1460 if (type) {
1461 if (type > RTAX_MAX) {
1da177e4
LT
1462 err = -EINVAL;
1463 goto out;
1464 }
86872cb5 1465
defb3519 1466 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1467 }
1da177e4
LT
1468 }
1469 }
1470
d8d1f30b 1471 rt->dst.dev = dev;
1da177e4 1472 rt->rt6i_idev = idev;
c71099ac 1473 rt->rt6i_table = table;
63152fc0 1474
c346dca1 1475 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1476
86872cb5 1477 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1478
1479out:
1480 if (dev)
1481 dev_put(dev);
1482 if (idev)
1483 in6_dev_put(idev);
1484 if (rt)
d8d1f30b 1485 dst_free(&rt->dst);
1da177e4
LT
1486 return err;
1487}
1488
86872cb5 1489static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1490{
1491 int err;
c71099ac 1492 struct fib6_table *table;
d1918542 1493 struct net *net = dev_net(rt->dst.dev);
1da177e4 1494
8ed67789 1495 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1496 return -ENOENT;
1497
c71099ac
TG
1498 table = rt->rt6i_table;
1499 write_lock_bh(&table->tb6_lock);
1da177e4 1500
86872cb5 1501 err = fib6_del(rt, info);
d8d1f30b 1502 dst_release(&rt->dst);
1da177e4 1503
c71099ac 1504 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1505
1506 return err;
1507}
1508
e0a1ad73
TG
1509int ip6_del_rt(struct rt6_info *rt)
1510{
4d1169c1 1511 struct nl_info info = {
d1918542 1512 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1513 };
528c4ceb 1514 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1515}
1516
86872cb5 1517static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1518{
c71099ac 1519 struct fib6_table *table;
1da177e4
LT
1520 struct fib6_node *fn;
1521 struct rt6_info *rt;
1522 int err = -ESRCH;
1523
5578689a 1524 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1525 if (!table)
c71099ac
TG
1526 return err;
1527
1528 read_lock_bh(&table->tb6_lock);
1da177e4 1529
c71099ac 1530 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1531 &cfg->fc_dst, cfg->fc_dst_len,
1532 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1533
1da177e4 1534 if (fn) {
d8d1f30b 1535 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1536 if (cfg->fc_ifindex &&
d1918542
DM
1537 (!rt->dst.dev ||
1538 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1539 continue;
86872cb5
TG
1540 if (cfg->fc_flags & RTF_GATEWAY &&
1541 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1542 continue;
86872cb5 1543 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1544 continue;
d8d1f30b 1545 dst_hold(&rt->dst);
c71099ac 1546 read_unlock_bh(&table->tb6_lock);
1da177e4 1547
86872cb5 1548 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1549 }
1550 }
c71099ac 1551 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1552
1553 return err;
1554}
1555
1556/*
1557 * Handle redirects
1558 */
a6279458 1559struct ip6rd_flowi {
4c9483b2 1560 struct flowi6 fl6;
a6279458
YH
1561 struct in6_addr gateway;
1562};
1563
8ed67789
DL
1564static struct rt6_info *__ip6_route_redirect(struct net *net,
1565 struct fib6_table *table,
4c9483b2 1566 struct flowi6 *fl6,
a6279458 1567 int flags)
1da177e4 1568{
4c9483b2 1569 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1570 struct rt6_info *rt;
e843b9e1 1571 struct fib6_node *fn;
c71099ac 1572
1da177e4 1573 /*
e843b9e1
YH
1574 * Get the "current" route for this destination and
1575 * check if the redirect has come from approriate router.
1576 *
1577 * RFC 2461 specifies that redirects should only be
1578 * accepted if they come from the nexthop to the target.
1579 * Due to the way the routes are chosen, this notion
1580 * is a bit fuzzy and one might need to check all possible
1581 * routes.
1da177e4 1582 */
1da177e4 1583
c71099ac 1584 read_lock_bh(&table->tb6_lock);
4c9483b2 1585 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1586restart:
d8d1f30b 1587 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1588 /*
1589 * Current route is on-link; redirect is always invalid.
1590 *
1591 * Seems, previous statement is not true. It could
1592 * be node, which looks for us as on-link (f.e. proxy ndisc)
1593 * But then router serving it might decide, that we should
1594 * know truth 8)8) --ANK (980726).
1595 */
1596 if (rt6_check_expired(rt))
1597 continue;
1598 if (!(rt->rt6i_flags & RTF_GATEWAY))
1599 continue;
d1918542 1600 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
e843b9e1 1601 continue;
a6279458 1602 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1603 continue;
1604 break;
1605 }
a6279458 1606
cb15d9c2 1607 if (!rt)
8ed67789 1608 rt = net->ipv6.ip6_null_entry;
4c9483b2 1609 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1610out:
d8d1f30b 1611 dst_hold(&rt->dst);
a6279458 1612
c71099ac 1613 read_unlock_bh(&table->tb6_lock);
e843b9e1 1614
a6279458
YH
1615 return rt;
1616};
1617
b71d1d42
ED
1618static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1619 const struct in6_addr *src,
1620 const struct in6_addr *gateway,
a6279458
YH
1621 struct net_device *dev)
1622{
adaa70bb 1623 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1624 struct net *net = dev_net(dev);
a6279458 1625 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1626 .fl6 = {
1627 .flowi6_oif = dev->ifindex,
1628 .daddr = *dest,
1629 .saddr = *src,
a6279458 1630 },
a6279458 1631 };
adaa70bb 1632
4e3fd7a0 1633 rdfl.gateway = *gateway;
86c36ce4 1634
adaa70bb
TG
1635 if (rt6_need_strict(dest))
1636 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1637
4c9483b2 1638 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1639 flags, __ip6_route_redirect);
a6279458
YH
1640}
1641
b71d1d42
ED
1642void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1643 const struct in6_addr *saddr,
a6279458
YH
1644 struct neighbour *neigh, u8 *lladdr, int on_link)
1645{
1646 struct rt6_info *rt, *nrt = NULL;
1647 struct netevent_redirect netevent;
c346dca1 1648 struct net *net = dev_net(neigh->dev);
a6279458
YH
1649
1650 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1651
8ed67789 1652 if (rt == net->ipv6.ip6_null_entry) {
e87cc472 1653 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
a6279458 1654 goto out;
1da177e4
LT
1655 }
1656
1da177e4
LT
1657 /*
1658 * We have finally decided to accept it.
1659 */
1660
1ab1457c 1661 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1662 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1663 NEIGH_UPDATE_F_OVERRIDE|
1664 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1665 NEIGH_UPDATE_F_ISROUTER))
1666 );
1667
1668 /*
1669 * Redirect received -> path was valid.
1670 * Look, redirects are sent only in response to data packets,
1671 * so that this nexthop apparently is reachable. --ANK
1672 */
d8d1f30b 1673 dst_confirm(&rt->dst);
1da177e4
LT
1674
1675 /* Duplicate redirect: silently ignore. */
27217455 1676 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1da177e4
LT
1677 goto out;
1678
21efcfa0 1679 nrt = ip6_rt_copy(rt, dest);
38308473 1680 if (!nrt)
1da177e4
LT
1681 goto out;
1682
1683 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1684 if (on_link)
1685 nrt->rt6i_flags &= ~RTF_GATEWAY;
1686
4e3fd7a0 1687 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
69cce1d1 1688 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1689
40e22e8f 1690 if (ip6_ins_rt(nrt))
1da177e4
LT
1691 goto out;
1692
d8d1f30b
CG
1693 netevent.old = &rt->dst;
1694 netevent.new = &nrt->dst;
8d71740c
TT
1695 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1696
38308473 1697 if (rt->rt6i_flags & RTF_CACHE) {
e0a1ad73 1698 ip6_del_rt(rt);
1da177e4
LT
1699 return;
1700 }
1701
1702out:
d8d1f30b 1703 dst_release(&rt->dst);
1da177e4
LT
1704}
1705
1706/*
1707 * Handle ICMP "packet too big" messages
1708 * i.e. Path MTU discovery
1709 */
1710
b71d1d42 1711static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1712 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1713{
1714 struct rt6_info *rt, *nrt;
1715 int allfrag = 0;
d3052b55 1716again:
ae878ae2 1717 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
38308473 1718 if (!rt)
1da177e4
LT
1719 return;
1720
d3052b55
AV
1721 if (rt6_check_expired(rt)) {
1722 ip6_del_rt(rt);
1723 goto again;
1724 }
1725
d8d1f30b 1726 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1727 goto out;
1728
1729 if (pmtu < IPV6_MIN_MTU) {
1730 /*
1ab1457c 1731 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1732 * MTU (1280) and a fragment header should always be included
1733 * after a node receiving Too Big message reporting PMTU is
1734 * less than the IPv6 Minimum Link MTU.
1735 */
1736 pmtu = IPV6_MIN_MTU;
1737 allfrag = 1;
1738 }
1739
1740 /* New mtu received -> path was valid.
1741 They are sent only in response to data packets,
1742 so that this nexthop apparently is reachable. --ANK
1743 */
d8d1f30b 1744 dst_confirm(&rt->dst);
1da177e4
LT
1745
1746 /* Host route. If it is static, it would be better
1747 not to override it, but add new one, so that
1748 when cache entry will expire old pmtu
1749 would return automatically.
1750 */
1751 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1752 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1753 if (allfrag) {
1754 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1755 features |= RTAX_FEATURE_ALLFRAG;
1756 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1757 }
1716a961
G
1758 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1759 rt->rt6i_flags |= RTF_MODIFIED;
1da177e4
LT
1760 goto out;
1761 }
1762
1763 /* Network route.
1764 Two cases are possible:
1765 1. It is connected route. Action: COW
1766 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1767 */
27217455 1768 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1769 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1770 else
1771 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1772
d5315b50 1773 if (nrt) {
defb3519
DM
1774 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1775 if (allfrag) {
1776 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1777 features |= RTAX_FEATURE_ALLFRAG;
1778 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1779 }
a1e78363
YH
1780
1781 /* According to RFC 1981, detecting PMTU increase shouldn't be
1782 * happened within 5 mins, the recommended timer is 10 mins.
1783 * Here this route expiration time is set to ip6_rt_mtu_expires
1784 * which is 10 mins. After 10 mins the decreased pmtu is expired
1785 * and detecting PMTU increase will be automatically happened.
1786 */
1716a961
G
1787 rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1788 nrt->rt6i_flags |= RTF_DYNAMIC;
40e22e8f 1789 ip6_ins_rt(nrt);
1da177e4 1790 }
1da177e4 1791out:
d8d1f30b 1792 dst_release(&rt->dst);
1da177e4
LT
1793}
1794
b71d1d42 1795void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1796 struct net_device *dev, u32 pmtu)
1797{
1798 struct net *net = dev_net(dev);
1799
1800 /*
1801 * RFC 1981 states that a node "MUST reduce the size of the packets it
1802 * is sending along the path" that caused the Packet Too Big message.
1803 * Since it's not possible in the general case to determine which
1804 * interface was used to send the original packet, we update the MTU
1805 * on the interface that will be used to send future packets. We also
1806 * update the MTU on the interface that received the Packet Too Big in
1807 * case the original packet was forced out that interface with
1808 * SO_BINDTODEVICE or similar. This is the next best thing to the
1809 * correct behaviour, which would be to update the MTU on all
1810 * interfaces.
1811 */
1812 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1813 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1814}
1815
1da177e4
LT
1816/*
1817 * Misc support functions
1818 */
1819
1716a961 1820static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1821 const struct in6_addr *dest)
1da177e4 1822{
d1918542 1823 struct net *net = dev_net(ort->dst.dev);
8b96d22d
DM
1824 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1825 ort->rt6i_table);
1da177e4
LT
1826
1827 if (rt) {
d8d1f30b
CG
1828 rt->dst.input = ort->dst.input;
1829 rt->dst.output = ort->dst.output;
8e2ec639 1830 rt->dst.flags |= DST_HOST;
d8d1f30b 1831
4e3fd7a0 1832 rt->rt6i_dst.addr = *dest;
8e2ec639 1833 rt->rt6i_dst.plen = 128;
defb3519 1834 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1835 rt->dst.error = ort->dst.error;
1da177e4
LT
1836 rt->rt6i_idev = ort->rt6i_idev;
1837 if (rt->rt6i_idev)
1838 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1839 rt->dst.lastuse = jiffies;
1da177e4 1840
4e3fd7a0 1841 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1842 rt->rt6i_flags = ort->rt6i_flags;
1843 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1844 (RTF_DEFAULT | RTF_ADDRCONF))
1845 rt6_set_from(rt, ort);
1846 else
1847 rt6_clean_expires(rt);
1da177e4
LT
1848 rt->rt6i_metric = 0;
1849
1da177e4
LT
1850#ifdef CONFIG_IPV6_SUBTREES
1851 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1852#endif
0f6c6392 1853 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1854 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1855 }
1856 return rt;
1857}
1858
70ceb4f5 1859#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1860static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1861 const struct in6_addr *prefix, int prefixlen,
1862 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1863{
1864 struct fib6_node *fn;
1865 struct rt6_info *rt = NULL;
c71099ac
TG
1866 struct fib6_table *table;
1867
efa2cea0 1868 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1869 if (!table)
c71099ac 1870 return NULL;
70ceb4f5 1871
c71099ac
TG
1872 write_lock_bh(&table->tb6_lock);
1873 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1874 if (!fn)
1875 goto out;
1876
d8d1f30b 1877 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1878 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1879 continue;
1880 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1881 continue;
1882 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1883 continue;
d8d1f30b 1884 dst_hold(&rt->dst);
70ceb4f5
YH
1885 break;
1886 }
1887out:
c71099ac 1888 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1889 return rt;
1890}
1891
efa2cea0 1892static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1893 const struct in6_addr *prefix, int prefixlen,
1894 const struct in6_addr *gwaddr, int ifindex,
95c96174 1895 unsigned int pref)
70ceb4f5 1896{
86872cb5
TG
1897 struct fib6_config cfg = {
1898 .fc_table = RT6_TABLE_INFO,
238fc7ea 1899 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1900 .fc_ifindex = ifindex,
1901 .fc_dst_len = prefixlen,
1902 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1903 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1904 .fc_nlinfo.pid = 0,
1905 .fc_nlinfo.nlh = NULL,
1906 .fc_nlinfo.nl_net = net,
86872cb5
TG
1907 };
1908
4e3fd7a0
AD
1909 cfg.fc_dst = *prefix;
1910 cfg.fc_gateway = *gwaddr;
70ceb4f5 1911
e317da96
YH
1912 /* We should treat it as a default route if prefix length is 0. */
1913 if (!prefixlen)
86872cb5 1914 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1915
86872cb5 1916 ip6_route_add(&cfg);
70ceb4f5 1917
efa2cea0 1918 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1919}
1920#endif
1921
b71d1d42 1922struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1923{
1da177e4 1924 struct rt6_info *rt;
c71099ac 1925 struct fib6_table *table;
1da177e4 1926
c346dca1 1927 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1928 if (!table)
c71099ac 1929 return NULL;
1da177e4 1930
c71099ac 1931 write_lock_bh(&table->tb6_lock);
d8d1f30b 1932 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1933 if (dev == rt->dst.dev &&
045927ff 1934 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1935 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1936 break;
1937 }
1938 if (rt)
d8d1f30b 1939 dst_hold(&rt->dst);
c71099ac 1940 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1941 return rt;
1942}
1943
b71d1d42 1944struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1945 struct net_device *dev,
1946 unsigned int pref)
1da177e4 1947{
86872cb5
TG
1948 struct fib6_config cfg = {
1949 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1950 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1951 .fc_ifindex = dev->ifindex,
1952 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1953 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1954 .fc_nlinfo.pid = 0,
1955 .fc_nlinfo.nlh = NULL,
c346dca1 1956 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1957 };
1da177e4 1958
4e3fd7a0 1959 cfg.fc_gateway = *gwaddr;
1da177e4 1960
86872cb5 1961 ip6_route_add(&cfg);
1da177e4 1962
1da177e4
LT
1963 return rt6_get_dflt_router(gwaddr, dev);
1964}
1965
7b4da532 1966void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1967{
1968 struct rt6_info *rt;
c71099ac
TG
1969 struct fib6_table *table;
1970
1971 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1972 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1973 if (!table)
c71099ac 1974 return;
1da177e4
LT
1975
1976restart:
c71099ac 1977 read_lock_bh(&table->tb6_lock);
d8d1f30b 1978 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1979 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1980 dst_hold(&rt->dst);
c71099ac 1981 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1982 ip6_del_rt(rt);
1da177e4
LT
1983 goto restart;
1984 }
1985 }
c71099ac 1986 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1987}
1988
5578689a
DL
1989static void rtmsg_to_fib6_config(struct net *net,
1990 struct in6_rtmsg *rtmsg,
86872cb5
TG
1991 struct fib6_config *cfg)
1992{
1993 memset(cfg, 0, sizeof(*cfg));
1994
1995 cfg->fc_table = RT6_TABLE_MAIN;
1996 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1997 cfg->fc_metric = rtmsg->rtmsg_metric;
1998 cfg->fc_expires = rtmsg->rtmsg_info;
1999 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2000 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2001 cfg->fc_flags = rtmsg->rtmsg_flags;
2002
5578689a 2003 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2004
4e3fd7a0
AD
2005 cfg->fc_dst = rtmsg->rtmsg_dst;
2006 cfg->fc_src = rtmsg->rtmsg_src;
2007 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2008}
2009
5578689a 2010int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2011{
86872cb5 2012 struct fib6_config cfg;
1da177e4
LT
2013 struct in6_rtmsg rtmsg;
2014 int err;
2015
2016 switch(cmd) {
2017 case SIOCADDRT: /* Add a route */
2018 case SIOCDELRT: /* Delete a route */
2019 if (!capable(CAP_NET_ADMIN))
2020 return -EPERM;
2021 err = copy_from_user(&rtmsg, arg,
2022 sizeof(struct in6_rtmsg));
2023 if (err)
2024 return -EFAULT;
86872cb5 2025
5578689a 2026 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2027
1da177e4
LT
2028 rtnl_lock();
2029 switch (cmd) {
2030 case SIOCADDRT:
86872cb5 2031 err = ip6_route_add(&cfg);
1da177e4
LT
2032 break;
2033 case SIOCDELRT:
86872cb5 2034 err = ip6_route_del(&cfg);
1da177e4
LT
2035 break;
2036 default:
2037 err = -EINVAL;
2038 }
2039 rtnl_unlock();
2040
2041 return err;
3ff50b79 2042 }
1da177e4
LT
2043
2044 return -EINVAL;
2045}
2046
2047/*
2048 * Drop the packet on the floor
2049 */
2050
d5fdd6ba 2051static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2052{
612f09e8 2053 int type;
adf30907 2054 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2055 switch (ipstats_mib_noroutes) {
2056 case IPSTATS_MIB_INNOROUTES:
0660e03f 2057 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2058 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2059 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2060 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2061 break;
2062 }
2063 /* FALLTHROUGH */
2064 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2065 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2066 ipstats_mib_noroutes);
612f09e8
YH
2067 break;
2068 }
3ffe533c 2069 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2070 kfree_skb(skb);
2071 return 0;
2072}
2073
9ce8ade0
TG
2074static int ip6_pkt_discard(struct sk_buff *skb)
2075{
612f09e8 2076 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2077}
2078
20380731 2079static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2080{
adf30907 2081 skb->dev = skb_dst(skb)->dev;
612f09e8 2082 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2083}
2084
6723ab54
DM
2085#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2086
9ce8ade0
TG
2087static int ip6_pkt_prohibit(struct sk_buff *skb)
2088{
612f09e8 2089 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2090}
2091
2092static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2093{
adf30907 2094 skb->dev = skb_dst(skb)->dev;
612f09e8 2095 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2096}
2097
6723ab54
DM
2098#endif
2099
1da177e4
LT
2100/*
2101 * Allocate a dst for local (unicast / anycast) address.
2102 */
2103
2104struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2105 const struct in6_addr *addr,
8f031519 2106 bool anycast)
1da177e4 2107{
c346dca1 2108 struct net *net = dev_net(idev->dev);
8b96d22d 2109 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
f83c7790 2110 int err;
1da177e4 2111
38308473 2112 if (!rt) {
f3213831 2113 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
1da177e4 2114 return ERR_PTR(-ENOMEM);
40385653 2115 }
1da177e4 2116
1da177e4
LT
2117 in6_dev_hold(idev);
2118
11d53b49 2119 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2120 rt->dst.input = ip6_input;
2121 rt->dst.output = ip6_output;
1da177e4 2122 rt->rt6i_idev = idev;
d8d1f30b 2123 rt->dst.obsolete = -1;
1da177e4
LT
2124
2125 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2126 if (anycast)
2127 rt->rt6i_flags |= RTF_ANYCAST;
2128 else
1da177e4 2129 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2130 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2131 if (err) {
d8d1f30b 2132 dst_free(&rt->dst);
f83c7790 2133 return ERR_PTR(err);
1da177e4
LT
2134 }
2135
4e3fd7a0 2136 rt->rt6i_dst.addr = *addr;
1da177e4 2137 rt->rt6i_dst.plen = 128;
5578689a 2138 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2139
d8d1f30b 2140 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2141
2142 return rt;
2143}
2144
c3968a85
DW
2145int ip6_route_get_saddr(struct net *net,
2146 struct rt6_info *rt,
b71d1d42 2147 const struct in6_addr *daddr,
c3968a85
DW
2148 unsigned int prefs,
2149 struct in6_addr *saddr)
2150{
2151 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2152 int err = 0;
2153 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2154 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2155 else
2156 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2157 daddr, prefs, saddr);
2158 return err;
2159}
2160
2161/* remove deleted ip from prefsrc entries */
2162struct arg_dev_net_ip {
2163 struct net_device *dev;
2164 struct net *net;
2165 struct in6_addr *addr;
2166};
2167
2168static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2169{
2170 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2171 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2172 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2173
d1918542 2174 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2175 rt != net->ipv6.ip6_null_entry &&
2176 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2177 /* remove prefsrc entry */
2178 rt->rt6i_prefsrc.plen = 0;
2179 }
2180 return 0;
2181}
2182
2183void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2184{
2185 struct net *net = dev_net(ifp->idev->dev);
2186 struct arg_dev_net_ip adni = {
2187 .dev = ifp->idev->dev,
2188 .net = net,
2189 .addr = &ifp->addr,
2190 };
2191 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2192}
2193
8ed67789
DL
2194struct arg_dev_net {
2195 struct net_device *dev;
2196 struct net *net;
2197};
2198
1da177e4
LT
2199static int fib6_ifdown(struct rt6_info *rt, void *arg)
2200{
bc3ef660 2201 const struct arg_dev_net *adn = arg;
2202 const struct net_device *dev = adn->dev;
8ed67789 2203
d1918542 2204 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2205 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2206 return -1;
c159d30c 2207
1da177e4
LT
2208 return 0;
2209}
2210
f3db4851 2211void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2212{
8ed67789
DL
2213 struct arg_dev_net adn = {
2214 .dev = dev,
2215 .net = net,
2216 };
2217
2218 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2219 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2220}
2221
95c96174 2222struct rt6_mtu_change_arg {
1da177e4 2223 struct net_device *dev;
95c96174 2224 unsigned int mtu;
1da177e4
LT
2225};
2226
2227static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2228{
2229 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2230 struct inet6_dev *idev;
2231
2232 /* In IPv6 pmtu discovery is not optional,
2233 so that RTAX_MTU lock cannot disable it.
2234 We still use this lock to block changes
2235 caused by addrconf/ndisc.
2236 */
2237
2238 idev = __in6_dev_get(arg->dev);
38308473 2239 if (!idev)
1da177e4
LT
2240 return 0;
2241
2242 /* For administrative MTU increase, there is no way to discover
2243 IPv6 PMTU increase, so PMTU increase should be updated here.
2244 Since RFC 1981 doesn't include administrative MTU increase
2245 update PMTU increase is a MUST. (i.e. jumbo frame)
2246 */
2247 /*
2248 If new MTU is less than route PMTU, this new MTU will be the
2249 lowest MTU in the path, update the route PMTU to reflect PMTU
2250 decreases; if new MTU is greater than route PMTU, and the
2251 old MTU is the lowest MTU in the path, update the route PMTU
2252 to reflect the increase. In this case if the other nodes' MTU
2253 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2254 PMTU discouvery.
2255 */
d1918542 2256 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2257 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2258 (dst_mtu(&rt->dst) >= arg->mtu ||
2259 (dst_mtu(&rt->dst) < arg->mtu &&
2260 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2261 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2262 }
1da177e4
LT
2263 return 0;
2264}
2265
95c96174 2266void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2267{
c71099ac
TG
2268 struct rt6_mtu_change_arg arg = {
2269 .dev = dev,
2270 .mtu = mtu,
2271 };
1da177e4 2272
c346dca1 2273 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2274}
2275
ef7c79ed 2276static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2277 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2278 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2279 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2280 [RTA_PRIORITY] = { .type = NLA_U32 },
2281 [RTA_METRICS] = { .type = NLA_NESTED },
2282};
2283
2284static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2285 struct fib6_config *cfg)
1da177e4 2286{
86872cb5
TG
2287 struct rtmsg *rtm;
2288 struct nlattr *tb[RTA_MAX+1];
2289 int err;
1da177e4 2290
86872cb5
TG
2291 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2292 if (err < 0)
2293 goto errout;
1da177e4 2294
86872cb5
TG
2295 err = -EINVAL;
2296 rtm = nlmsg_data(nlh);
2297 memset(cfg, 0, sizeof(*cfg));
2298
2299 cfg->fc_table = rtm->rtm_table;
2300 cfg->fc_dst_len = rtm->rtm_dst_len;
2301 cfg->fc_src_len = rtm->rtm_src_len;
2302 cfg->fc_flags = RTF_UP;
2303 cfg->fc_protocol = rtm->rtm_protocol;
2304
2305 if (rtm->rtm_type == RTN_UNREACHABLE)
2306 cfg->fc_flags |= RTF_REJECT;
2307
ab79ad14
2308 if (rtm->rtm_type == RTN_LOCAL)
2309 cfg->fc_flags |= RTF_LOCAL;
2310
86872cb5
TG
2311 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2312 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2313 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2314
2315 if (tb[RTA_GATEWAY]) {
2316 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2317 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2318 }
86872cb5
TG
2319
2320 if (tb[RTA_DST]) {
2321 int plen = (rtm->rtm_dst_len + 7) >> 3;
2322
2323 if (nla_len(tb[RTA_DST]) < plen)
2324 goto errout;
2325
2326 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2327 }
86872cb5
TG
2328
2329 if (tb[RTA_SRC]) {
2330 int plen = (rtm->rtm_src_len + 7) >> 3;
2331
2332 if (nla_len(tb[RTA_SRC]) < plen)
2333 goto errout;
2334
2335 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2336 }
86872cb5 2337
c3968a85
DW
2338 if (tb[RTA_PREFSRC])
2339 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2340
86872cb5
TG
2341 if (tb[RTA_OIF])
2342 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2343
2344 if (tb[RTA_PRIORITY])
2345 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2346
2347 if (tb[RTA_METRICS]) {
2348 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2349 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2350 }
86872cb5
TG
2351
2352 if (tb[RTA_TABLE])
2353 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2354
2355 err = 0;
2356errout:
2357 return err;
1da177e4
LT
2358}
2359
c127ea2c 2360static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2361{
86872cb5
TG
2362 struct fib6_config cfg;
2363 int err;
1da177e4 2364
86872cb5
TG
2365 err = rtm_to_fib6_config(skb, nlh, &cfg);
2366 if (err < 0)
2367 return err;
2368
2369 return ip6_route_del(&cfg);
1da177e4
LT
2370}
2371
c127ea2c 2372static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2373{
86872cb5
TG
2374 struct fib6_config cfg;
2375 int err;
1da177e4 2376
86872cb5
TG
2377 err = rtm_to_fib6_config(skb, nlh, &cfg);
2378 if (err < 0)
2379 return err;
2380
2381 return ip6_route_add(&cfg);
1da177e4
LT
2382}
2383
339bf98f
TG
2384static inline size_t rt6_nlmsg_size(void)
2385{
2386 return NLMSG_ALIGN(sizeof(struct rtmsg))
2387 + nla_total_size(16) /* RTA_SRC */
2388 + nla_total_size(16) /* RTA_DST */
2389 + nla_total_size(16) /* RTA_GATEWAY */
2390 + nla_total_size(16) /* RTA_PREFSRC */
2391 + nla_total_size(4) /* RTA_TABLE */
2392 + nla_total_size(4) /* RTA_IIF */
2393 + nla_total_size(4) /* RTA_OIF */
2394 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2395 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2396 + nla_total_size(sizeof(struct rta_cacheinfo));
2397}
2398
191cd582
BH
2399static int rt6_fill_node(struct net *net,
2400 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2401 struct in6_addr *dst, struct in6_addr *src,
2402 int iif, int type, u32 pid, u32 seq,
7bc570c8 2403 int prefix, int nowait, unsigned int flags)
1da177e4 2404{
346f870b 2405 const struct inet_peer *peer;
1da177e4 2406 struct rtmsg *rtm;
2d7202bf 2407 struct nlmsghdr *nlh;
e3703b3d 2408 long expires;
9e762a4a 2409 u32 table;
f2c31e32 2410 struct neighbour *n;
346f870b 2411 u32 ts, tsage;
1da177e4
LT
2412
2413 if (prefix) { /* user wants prefix routes only */
2414 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2415 /* success since this is not a prefix route */
2416 return 1;
2417 }
2418 }
2419
2d7202bf 2420 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2421 if (!nlh)
26932566 2422 return -EMSGSIZE;
2d7202bf
TG
2423
2424 rtm = nlmsg_data(nlh);
1da177e4
LT
2425 rtm->rtm_family = AF_INET6;
2426 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2427 rtm->rtm_src_len = rt->rt6i_src.plen;
2428 rtm->rtm_tos = 0;
c71099ac 2429 if (rt->rt6i_table)
9e762a4a 2430 table = rt->rt6i_table->tb6_id;
c71099ac 2431 else
9e762a4a
PM
2432 table = RT6_TABLE_UNSPEC;
2433 rtm->rtm_table = table;
c78679e8
DM
2434 if (nla_put_u32(skb, RTA_TABLE, table))
2435 goto nla_put_failure;
38308473 2436 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2437 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2438 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2439 rtm->rtm_type = RTN_LOCAL;
d1918542 2440 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2441 rtm->rtm_type = RTN_LOCAL;
2442 else
2443 rtm->rtm_type = RTN_UNICAST;
2444 rtm->rtm_flags = 0;
2445 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2446 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2447 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2448 rtm->rtm_protocol = RTPROT_REDIRECT;
2449 else if (rt->rt6i_flags & RTF_ADDRCONF)
2450 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2451 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2452 rtm->rtm_protocol = RTPROT_RA;
2453
38308473 2454 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2455 rtm->rtm_flags |= RTM_F_CLONED;
2456
2457 if (dst) {
c78679e8
DM
2458 if (nla_put(skb, RTA_DST, 16, dst))
2459 goto nla_put_failure;
1ab1457c 2460 rtm->rtm_dst_len = 128;
1da177e4 2461 } else if (rtm->rtm_dst_len)
c78679e8
DM
2462 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2463 goto nla_put_failure;
1da177e4
LT
2464#ifdef CONFIG_IPV6_SUBTREES
2465 if (src) {
c78679e8
DM
2466 if (nla_put(skb, RTA_SRC, 16, src))
2467 goto nla_put_failure;
1ab1457c 2468 rtm->rtm_src_len = 128;
c78679e8
DM
2469 } else if (rtm->rtm_src_len &&
2470 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2471 goto nla_put_failure;
1da177e4 2472#endif
7bc570c8
YH
2473 if (iif) {
2474#ifdef CONFIG_IPV6_MROUTE
2475 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2476 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2477 if (err <= 0) {
2478 if (!nowait) {
2479 if (err == 0)
2480 return 0;
2481 goto nla_put_failure;
2482 } else {
2483 if (err == -EMSGSIZE)
2484 goto nla_put_failure;
2485 }
2486 }
2487 } else
2488#endif
c78679e8
DM
2489 if (nla_put_u32(skb, RTA_IIF, iif))
2490 goto nla_put_failure;
7bc570c8 2491 } else if (dst) {
1da177e4 2492 struct in6_addr saddr_buf;
c78679e8
DM
2493 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2494 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2495 goto nla_put_failure;
1da177e4 2496 }
2d7202bf 2497
c3968a85
DW
2498 if (rt->rt6i_prefsrc.plen) {
2499 struct in6_addr saddr_buf;
4e3fd7a0 2500 saddr_buf = rt->rt6i_prefsrc.addr;
c78679e8
DM
2501 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2502 goto nla_put_failure;
c3968a85
DW
2503 }
2504
defb3519 2505 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2506 goto nla_put_failure;
2507
f2c31e32 2508 rcu_read_lock();
27217455 2509 n = dst_get_neighbour_noref(&rt->dst);
94f826b8
ED
2510 if (n) {
2511 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2512 rcu_read_unlock();
2513 goto nla_put_failure;
2514 }
2515 }
f2c31e32 2516 rcu_read_unlock();
2d7202bf 2517
c78679e8
DM
2518 if (rt->dst.dev &&
2519 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2520 goto nla_put_failure;
2521 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2522 goto nla_put_failure;
36e3deae
YH
2523 if (!(rt->rt6i_flags & RTF_EXPIRES))
2524 expires = 0;
d1918542
DM
2525 else if (rt->dst.expires - jiffies < INT_MAX)
2526 expires = rt->dst.expires - jiffies;
36e3deae
YH
2527 else
2528 expires = INT_MAX;
69cdf8f9 2529
97bab73f
DM
2530 peer = NULL;
2531 if (rt6_has_peer(rt))
2532 peer = rt6_peer_ptr(rt);
346f870b
DM
2533 ts = tsage = 0;
2534 if (peer && peer->tcp_ts_stamp) {
2535 ts = peer->tcp_ts;
2536 tsage = get_seconds() - peer->tcp_ts_stamp;
2537 }
2538
2539 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
d8d1f30b 2540 expires, rt->dst.error) < 0)
e3703b3d 2541 goto nla_put_failure;
2d7202bf
TG
2542
2543 return nlmsg_end(skb, nlh);
2544
2545nla_put_failure:
26932566
PM
2546 nlmsg_cancel(skb, nlh);
2547 return -EMSGSIZE;
1da177e4
LT
2548}
2549
1b43af54 2550int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2551{
2552 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2553 int prefix;
2554
2d7202bf
TG
2555 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2556 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2557 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2558 } else
2559 prefix = 0;
2560
191cd582
BH
2561 return rt6_fill_node(arg->net,
2562 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2563 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2564 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2565}
2566
c127ea2c 2567static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2568{
3b1e0a65 2569 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2570 struct nlattr *tb[RTA_MAX+1];
2571 struct rt6_info *rt;
1da177e4 2572 struct sk_buff *skb;
ab364a6f 2573 struct rtmsg *rtm;
4c9483b2 2574 struct flowi6 fl6;
72331bc0 2575 int err, iif = 0, oif = 0;
1da177e4 2576
ab364a6f
TG
2577 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2578 if (err < 0)
2579 goto errout;
1da177e4 2580
ab364a6f 2581 err = -EINVAL;
4c9483b2 2582 memset(&fl6, 0, sizeof(fl6));
1da177e4 2583
ab364a6f
TG
2584 if (tb[RTA_SRC]) {
2585 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2586 goto errout;
2587
4e3fd7a0 2588 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2589 }
2590
2591 if (tb[RTA_DST]) {
2592 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2593 goto errout;
2594
4e3fd7a0 2595 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2596 }
2597
2598 if (tb[RTA_IIF])
2599 iif = nla_get_u32(tb[RTA_IIF]);
2600
2601 if (tb[RTA_OIF])
72331bc0 2602 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2603
2604 if (iif) {
2605 struct net_device *dev;
72331bc0
SL
2606 int flags = 0;
2607
5578689a 2608 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2609 if (!dev) {
2610 err = -ENODEV;
ab364a6f 2611 goto errout;
1da177e4 2612 }
72331bc0
SL
2613
2614 fl6.flowi6_iif = iif;
2615
2616 if (!ipv6_addr_any(&fl6.saddr))
2617 flags |= RT6_LOOKUP_F_HAS_SADDR;
2618
2619 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2620 flags);
2621 } else {
2622 fl6.flowi6_oif = oif;
2623
2624 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2625 }
2626
ab364a6f 2627 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2628 if (!skb) {
2173bff5 2629 dst_release(&rt->dst);
ab364a6f
TG
2630 err = -ENOBUFS;
2631 goto errout;
2632 }
1da177e4 2633
ab364a6f
TG
2634 /* Reserve room for dummy headers, this skb can pass
2635 through good chunk of routing engine.
2636 */
459a98ed 2637 skb_reset_mac_header(skb);
ab364a6f 2638 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2639
d8d1f30b 2640 skb_dst_set(skb, &rt->dst);
1da177e4 2641
4c9483b2 2642 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2643 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2644 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2645 if (err < 0) {
ab364a6f
TG
2646 kfree_skb(skb);
2647 goto errout;
1da177e4
LT
2648 }
2649
5578689a 2650 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2651errout:
1da177e4 2652 return err;
1da177e4
LT
2653}
2654
86872cb5 2655void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2656{
2657 struct sk_buff *skb;
5578689a 2658 struct net *net = info->nl_net;
528c4ceb
DL
2659 u32 seq;
2660 int err;
2661
2662 err = -ENOBUFS;
38308473 2663 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2664
339bf98f 2665 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2666 if (!skb)
21713ebc
TG
2667 goto errout;
2668
191cd582 2669 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2670 event, info->pid, seq, 0, 0, 0);
26932566
PM
2671 if (err < 0) {
2672 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2673 WARN_ON(err == -EMSGSIZE);
2674 kfree_skb(skb);
2675 goto errout;
2676 }
1ce85fe4
PNA
2677 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2678 info->nlh, gfp_any());
2679 return;
21713ebc
TG
2680errout:
2681 if (err < 0)
5578689a 2682 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2683}
2684
8ed67789
DL
2685static int ip6_route_dev_notify(struct notifier_block *this,
2686 unsigned long event, void *data)
2687{
2688 struct net_device *dev = (struct net_device *)data;
c346dca1 2689 struct net *net = dev_net(dev);
8ed67789
DL
2690
2691 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2692 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2693 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2694#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2695 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2696 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2697 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2698 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2699#endif
2700 }
2701
2702 return NOTIFY_OK;
2703}
2704
1da177e4
LT
2705/*
2706 * /proc
2707 */
2708
2709#ifdef CONFIG_PROC_FS
2710
1da177e4
LT
2711struct rt6_proc_arg
2712{
2713 char *buffer;
2714 int offset;
2715 int length;
2716 int skip;
2717 int len;
2718};
2719
2720static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2721{
33120b30 2722 struct seq_file *m = p_arg;
69cce1d1 2723 struct neighbour *n;
1da177e4 2724
4b7a4274 2725 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2726
2727#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2728 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2729#else
33120b30 2730 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2731#endif
f2c31e32 2732 rcu_read_lock();
27217455 2733 n = dst_get_neighbour_noref(&rt->dst);
69cce1d1
DM
2734 if (n) {
2735 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2736 } else {
33120b30 2737 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2738 }
f2c31e32 2739 rcu_read_unlock();
33120b30 2740 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2741 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2742 rt->dst.__use, rt->rt6i_flags,
d1918542 2743 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2744 return 0;
2745}
2746
33120b30 2747static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2748{
f3db4851 2749 struct net *net = (struct net *)m->private;
32b293a5 2750 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2751 return 0;
2752}
1da177e4 2753
33120b30
AD
2754static int ipv6_route_open(struct inode *inode, struct file *file)
2755{
de05c557 2756 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2757}
2758
33120b30
AD
2759static const struct file_operations ipv6_route_proc_fops = {
2760 .owner = THIS_MODULE,
2761 .open = ipv6_route_open,
2762 .read = seq_read,
2763 .llseek = seq_lseek,
b6fcbdb4 2764 .release = single_release_net,
33120b30
AD
2765};
2766
1da177e4
LT
2767static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2768{
69ddb805 2769 struct net *net = (struct net *)seq->private;
1da177e4 2770 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2771 net->ipv6.rt6_stats->fib_nodes,
2772 net->ipv6.rt6_stats->fib_route_nodes,
2773 net->ipv6.rt6_stats->fib_rt_alloc,
2774 net->ipv6.rt6_stats->fib_rt_entries,
2775 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2776 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2777 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2778
2779 return 0;
2780}
2781
2782static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2783{
de05c557 2784 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2785}
2786
9a32144e 2787static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2788 .owner = THIS_MODULE,
2789 .open = rt6_stats_seq_open,
2790 .read = seq_read,
2791 .llseek = seq_lseek,
b6fcbdb4 2792 .release = single_release_net,
1da177e4
LT
2793};
2794#endif /* CONFIG_PROC_FS */
2795
2796#ifdef CONFIG_SYSCTL
2797
1da177e4 2798static
8d65af78 2799int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2800 void __user *buffer, size_t *lenp, loff_t *ppos)
2801{
c486da34
LAG
2802 struct net *net;
2803 int delay;
2804 if (!write)
1da177e4 2805 return -EINVAL;
c486da34
LAG
2806
2807 net = (struct net *)ctl->extra1;
2808 delay = net->ipv6.sysctl.flush_delay;
2809 proc_dointvec(ctl, write, buffer, lenp, ppos);
2810 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2811 return 0;
1da177e4
LT
2812}
2813
760f2d01 2814ctl_table ipv6_route_table_template[] = {
1ab1457c 2815 {
1da177e4 2816 .procname = "flush",
4990509f 2817 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2818 .maxlen = sizeof(int),
89c8b3a1 2819 .mode = 0200,
6d9f239a 2820 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2821 },
2822 {
1da177e4 2823 .procname = "gc_thresh",
9a7ec3a9 2824 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2825 .maxlen = sizeof(int),
2826 .mode = 0644,
6d9f239a 2827 .proc_handler = proc_dointvec,
1da177e4
LT
2828 },
2829 {
1da177e4 2830 .procname = "max_size",
4990509f 2831 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2832 .maxlen = sizeof(int),
2833 .mode = 0644,
6d9f239a 2834 .proc_handler = proc_dointvec,
1da177e4
LT
2835 },
2836 {
1da177e4 2837 .procname = "gc_min_interval",
4990509f 2838 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2839 .maxlen = sizeof(int),
2840 .mode = 0644,
6d9f239a 2841 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2842 },
2843 {
1da177e4 2844 .procname = "gc_timeout",
4990509f 2845 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2846 .maxlen = sizeof(int),
2847 .mode = 0644,
6d9f239a 2848 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2849 },
2850 {
1da177e4 2851 .procname = "gc_interval",
4990509f 2852 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2853 .maxlen = sizeof(int),
2854 .mode = 0644,
6d9f239a 2855 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2856 },
2857 {
1da177e4 2858 .procname = "gc_elasticity",
4990509f 2859 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2860 .maxlen = sizeof(int),
2861 .mode = 0644,
f3d3f616 2862 .proc_handler = proc_dointvec,
1da177e4
LT
2863 },
2864 {
1da177e4 2865 .procname = "mtu_expires",
4990509f 2866 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2867 .maxlen = sizeof(int),
2868 .mode = 0644,
6d9f239a 2869 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2870 },
2871 {
1da177e4 2872 .procname = "min_adv_mss",
4990509f 2873 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2874 .maxlen = sizeof(int),
2875 .mode = 0644,
f3d3f616 2876 .proc_handler = proc_dointvec,
1da177e4
LT
2877 },
2878 {
1da177e4 2879 .procname = "gc_min_interval_ms",
4990509f 2880 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2881 .maxlen = sizeof(int),
2882 .mode = 0644,
6d9f239a 2883 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2884 },
f8572d8f 2885 { }
1da177e4
LT
2886};
2887
2c8c1e72 2888struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2889{
2890 struct ctl_table *table;
2891
2892 table = kmemdup(ipv6_route_table_template,
2893 sizeof(ipv6_route_table_template),
2894 GFP_KERNEL);
5ee09105
YH
2895
2896 if (table) {
2897 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2898 table[0].extra1 = net;
86393e52 2899 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2900 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2901 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2902 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2903 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2904 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2905 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2906 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2907 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2908 }
2909
760f2d01
DL
2910 return table;
2911}
1da177e4
LT
2912#endif
2913
2c8c1e72 2914static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2915{
633d424b 2916 int ret = -ENOMEM;
8ed67789 2917
86393e52
AD
2918 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2919 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2920
fc66f95c
ED
2921 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2922 goto out_ip6_dst_ops;
2923
8ed67789
DL
2924 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2925 sizeof(*net->ipv6.ip6_null_entry),
2926 GFP_KERNEL);
2927 if (!net->ipv6.ip6_null_entry)
fc66f95c 2928 goto out_ip6_dst_entries;
d8d1f30b 2929 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2930 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2931 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2932 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2933 ip6_template_metrics, true);
8ed67789
DL
2934
2935#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2936 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2937 sizeof(*net->ipv6.ip6_prohibit_entry),
2938 GFP_KERNEL);
68fffc67
PZ
2939 if (!net->ipv6.ip6_prohibit_entry)
2940 goto out_ip6_null_entry;
d8d1f30b 2941 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2942 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2943 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2944 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2945 ip6_template_metrics, true);
8ed67789
DL
2946
2947 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2948 sizeof(*net->ipv6.ip6_blk_hole_entry),
2949 GFP_KERNEL);
68fffc67
PZ
2950 if (!net->ipv6.ip6_blk_hole_entry)
2951 goto out_ip6_prohibit_entry;
d8d1f30b 2952 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2953 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2954 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2955 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2956 ip6_template_metrics, true);
8ed67789
DL
2957#endif
2958
b339a47c
PZ
2959 net->ipv6.sysctl.flush_delay = 0;
2960 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2961 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2962 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2963 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2964 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2965 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2966 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2967
cdb18761
DL
2968#ifdef CONFIG_PROC_FS
2969 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2970 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2971#endif
6891a346
BT
2972 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2973
8ed67789
DL
2974 ret = 0;
2975out:
2976 return ret;
f2fc6a54 2977
68fffc67
PZ
2978#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2979out_ip6_prohibit_entry:
2980 kfree(net->ipv6.ip6_prohibit_entry);
2981out_ip6_null_entry:
2982 kfree(net->ipv6.ip6_null_entry);
2983#endif
fc66f95c
ED
2984out_ip6_dst_entries:
2985 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2986out_ip6_dst_ops:
f2fc6a54 2987 goto out;
cdb18761
DL
2988}
2989
2c8c1e72 2990static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2991{
2992#ifdef CONFIG_PROC_FS
2993 proc_net_remove(net, "ipv6_route");
2994 proc_net_remove(net, "rt6_stats");
2995#endif
8ed67789
DL
2996 kfree(net->ipv6.ip6_null_entry);
2997#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2998 kfree(net->ipv6.ip6_prohibit_entry);
2999 kfree(net->ipv6.ip6_blk_hole_entry);
3000#endif
41bb78b4 3001 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3002}
3003
3004static struct pernet_operations ip6_route_net_ops = {
3005 .init = ip6_route_net_init,
3006 .exit = ip6_route_net_exit,
3007};
3008
c3426b47
DM
3009static int __net_init ipv6_inetpeer_init(struct net *net)
3010{
3011 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3012
3013 if (!bp)
3014 return -ENOMEM;
3015 inet_peer_base_init(bp);
3016 net->ipv6.peers = bp;
3017 return 0;
3018}
3019
3020static void __net_exit ipv6_inetpeer_exit(struct net *net)
3021{
3022 struct inet_peer_base *bp = net->ipv6.peers;
3023
3024 net->ipv6.peers = NULL;
56a6b248 3025 inetpeer_invalidate_tree(bp);
c3426b47
DM
3026 kfree(bp);
3027}
3028
2b823f72 3029static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3030 .init = ipv6_inetpeer_init,
3031 .exit = ipv6_inetpeer_exit,
3032};
3033
8ed67789
DL
3034static struct notifier_block ip6_route_dev_notifier = {
3035 .notifier_call = ip6_route_dev_notify,
3036 .priority = 0,
3037};
3038
433d49c3 3039int __init ip6_route_init(void)
1da177e4 3040{
433d49c3
DL
3041 int ret;
3042
9a7ec3a9
DL
3043 ret = -ENOMEM;
3044 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3045 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3046 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3047 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3048 goto out;
14e50e57 3049
fc66f95c 3050 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3051 if (ret)
bdb3289f 3052 goto out_kmem_cache;
bdb3289f 3053
fc66f95c
ED
3054 ret = register_pernet_subsys(&ip6_route_net_ops);
3055 if (ret)
3056 goto out_dst_entries;
3057
c3426b47
DM
3058 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3059 if (ret)
3060 goto out_register_subsys;
3061
5dc121e9
AE
3062 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3063
8ed67789
DL
3064 /* Registering of the loopback is done before this portion of code,
3065 * the loopback reference in rt6_info will not be taken, do it
3066 * manually for init_net */
d8d1f30b 3067 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3068 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3069 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3070 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3071 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3072 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3073 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3074 #endif
433d49c3
DL
3075 ret = fib6_init();
3076 if (ret)
c3426b47 3077 goto out_register_inetpeer;
433d49c3 3078
433d49c3
DL
3079 ret = xfrm6_init();
3080 if (ret)
cdb18761 3081 goto out_fib6_init;
c35b7e72 3082
433d49c3
DL
3083 ret = fib6_rules_init();
3084 if (ret)
3085 goto xfrm6_init;
7e5449c2 3086
433d49c3 3087 ret = -ENOBUFS;
c7ac8679
GR
3088 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3089 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3090 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 3091 goto fib6_rules_init;
c127ea2c 3092
8ed67789 3093 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
3094 if (ret)
3095 goto fib6_rules_init;
8ed67789 3096
433d49c3
DL
3097out:
3098 return ret;
3099
3100fib6_rules_init:
433d49c3
DL
3101 fib6_rules_cleanup();
3102xfrm6_init:
433d49c3 3103 xfrm6_fini();
433d49c3 3104out_fib6_init:
433d49c3 3105 fib6_gc_cleanup();
c3426b47
DM
3106out_register_inetpeer:
3107 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789
DL
3108out_register_subsys:
3109 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
3110out_dst_entries:
3111 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3112out_kmem_cache:
f2fc6a54 3113 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3114 goto out;
1da177e4
LT
3115}
3116
3117void ip6_route_cleanup(void)
3118{
8ed67789 3119 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 3120 fib6_rules_cleanup();
1da177e4 3121 xfrm6_fini();
1da177e4 3122 fib6_gc_cleanup();
c3426b47 3123 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3124 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3125 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3126 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3127}