]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - net/ipv6/route.c
ipv4: 16 slots in initial fib_info hash table
[thirdparty/kernel/stable.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
1da177e4
LT
60
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
1716a961 67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 68 const struct in6_addr *dest);
1da177e4 69static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 70static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 71static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73static void ip6_dst_destroy(struct dst_entry *);
74static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
569d3645 76static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
77
78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
81static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
82 struct sk_buff *skb, u32 mtu);
83static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
84 struct sk_buff *skb);
1da177e4 85
70ceb4f5 86#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 87static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
88 const struct in6_addr *prefix, int prefixlen,
89 const struct in6_addr *gwaddr, int ifindex,
95c96174 90 unsigned int pref);
efa2cea0 91static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
92 const struct in6_addr *prefix, int prefixlen,
93 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
94#endif
95
06582540
DM
96static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
97{
98 struct rt6_info *rt = (struct rt6_info *) dst;
99 struct inet_peer *peer;
100 u32 *p = NULL;
101
8e2ec639
YZ
102 if (!(rt->dst.flags & DST_HOST))
103 return NULL;
104
fbfe95a4 105 peer = rt6_get_peer_create(rt);
06582540
DM
106 if (peer) {
107 u32 *old_p = __DST_METRICS_PTR(old);
108 unsigned long prev, new;
109
110 p = peer->metrics;
111 if (inet_metrics_new(peer))
112 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113
114 new = (unsigned long) p;
115 prev = cmpxchg(&dst->_metrics, old, new);
116
117 if (prev != old) {
118 p = __DST_METRICS_PTR(prev);
119 if (prev & DST_METRICS_READ_ONLY)
120 p = NULL;
121 }
122 }
123 return p;
124}
125
f894cbf8
DM
126static inline const void *choose_neigh_daddr(struct rt6_info *rt,
127 struct sk_buff *skb,
128 const void *daddr)
39232973
DM
129{
130 struct in6_addr *p = &rt->rt6i_gateway;
131
a7563f34 132 if (!ipv6_addr_any(p))
39232973 133 return (const void *) p;
f894cbf8
DM
134 else if (skb)
135 return &ipv6_hdr(skb)->daddr;
39232973
DM
136 return daddr;
137}
138
f894cbf8
DM
139static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
140 struct sk_buff *skb,
141 const void *daddr)
d3aaeb38 142{
39232973
DM
143 struct rt6_info *rt = (struct rt6_info *) dst;
144 struct neighbour *n;
145
f894cbf8 146 daddr = choose_neigh_daddr(rt, skb, daddr);
39232973 147 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
148 if (n)
149 return n;
150 return neigh_create(&nd_tbl, daddr, dst->dev);
151}
152
8ade06c6 153static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 154{
8ade06c6
DM
155 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
156 if (!n) {
157 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
158 if (IS_ERR(n))
159 return PTR_ERR(n);
160 }
97cac082 161 rt->n = n;
f83c7790
DM
162
163 return 0;
d3aaeb38
DM
164}
165
9a7ec3a9 166static struct dst_ops ip6_dst_ops_template = {
1da177e4 167 .family = AF_INET6,
09640e63 168 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
169 .gc = ip6_dst_gc,
170 .gc_thresh = 1024,
171 .check = ip6_dst_check,
0dbaee3b 172 .default_advmss = ip6_default_advmss,
ebb762f2 173 .mtu = ip6_mtu,
06582540 174 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
175 .destroy = ip6_dst_destroy,
176 .ifdown = ip6_dst_ifdown,
177 .negative_advice = ip6_negative_advice,
178 .link_failure = ip6_link_failure,
179 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 180 .redirect = rt6_do_redirect,
1ac06e03 181 .local_out = __ip6_local_out,
d3aaeb38 182 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
183};
184
ebb762f2 185static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 186{
618f9bc7
SK
187 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
188
189 return mtu ? : dst->dev->mtu;
ec831ea7
RD
190}
191
6700c270
DM
192static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
193 struct sk_buff *skb, u32 mtu)
14e50e57
DM
194{
195}
196
6700c270
DM
197static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
198 struct sk_buff *skb)
b587ee3b
DM
199{
200}
201
0972ddb2
HB
202static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
203 unsigned long old)
204{
205 return NULL;
206}
207
14e50e57
DM
208static struct dst_ops ip6_dst_blackhole_ops = {
209 .family = AF_INET6,
09640e63 210 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
211 .destroy = ip6_dst_destroy,
212 .check = ip6_dst_check,
ebb762f2 213 .mtu = ip6_blackhole_mtu,
214f45c9 214 .default_advmss = ip6_default_advmss,
14e50e57 215 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 216 .redirect = ip6_rt_blackhole_redirect,
0972ddb2 217 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 218 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
219};
220
62fa8a84
DM
221static const u32 ip6_template_metrics[RTAX_MAX] = {
222 [RTAX_HOPLIMIT - 1] = 255,
223};
224
fb0af4c7 225static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
226 .dst = {
227 .__refcnt = ATOMIC_INIT(1),
228 .__use = 1,
2c20cbd7 229 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 230 .error = -ENETUNREACH,
d8d1f30b
CG
231 .input = ip6_pkt_discard,
232 .output = ip6_pkt_discard_out,
1da177e4
LT
233 },
234 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 235 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
236 .rt6i_metric = ~(u32) 0,
237 .rt6i_ref = ATOMIC_INIT(1),
238};
239
101367c2
TG
240#ifdef CONFIG_IPV6_MULTIPLE_TABLES
241
6723ab54
DM
242static int ip6_pkt_prohibit(struct sk_buff *skb);
243static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 244
fb0af4c7 245static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
246 .dst = {
247 .__refcnt = ATOMIC_INIT(1),
248 .__use = 1,
2c20cbd7 249 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 250 .error = -EACCES,
d8d1f30b
CG
251 .input = ip6_pkt_prohibit,
252 .output = ip6_pkt_prohibit_out,
101367c2
TG
253 },
254 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 255 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
256 .rt6i_metric = ~(u32) 0,
257 .rt6i_ref = ATOMIC_INIT(1),
258};
259
fb0af4c7 260static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
261 .dst = {
262 .__refcnt = ATOMIC_INIT(1),
263 .__use = 1,
2c20cbd7 264 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 265 .error = -EINVAL,
d8d1f30b
CG
266 .input = dst_discard,
267 .output = dst_discard,
101367c2
TG
268 },
269 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 270 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
271 .rt6i_metric = ~(u32) 0,
272 .rt6i_ref = ATOMIC_INIT(1),
273};
274
275#endif
276
1da177e4 277/* allocate dst with ip6_dst_ops */
97bab73f 278static inline struct rt6_info *ip6_dst_alloc(struct net *net,
957c665f 279 struct net_device *dev,
8b96d22d
DM
280 int flags,
281 struct fib6_table *table)
1da177e4 282{
97bab73f 283 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 284 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 285
97bab73f 286 if (rt) {
8104891b
SK
287 struct dst_entry *dst = &rt->dst;
288
289 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
8b96d22d 290 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
6f3118b5 291 rt->rt6i_genid = rt_genid(net);
97bab73f 292 }
cf911662 293 return rt;
1da177e4
LT
294}
295
296static void ip6_dst_destroy(struct dst_entry *dst)
297{
298 struct rt6_info *rt = (struct rt6_info *)dst;
299 struct inet6_dev *idev = rt->rt6i_idev;
300
97cac082
DM
301 if (rt->n)
302 neigh_release(rt->n);
303
8e2ec639
YZ
304 if (!(rt->dst.flags & DST_HOST))
305 dst_destroy_metrics_generic(dst);
306
38308473 307 if (idev) {
1da177e4
LT
308 rt->rt6i_idev = NULL;
309 in6_dev_put(idev);
1ab1457c 310 }
1716a961
G
311
312 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
313 dst_release(dst->from);
314
97bab73f
DM
315 if (rt6_has_peer(rt)) {
316 struct inet_peer *peer = rt6_peer_ptr(rt);
b3419363
DM
317 inet_putpeer(peer);
318 }
319}
320
6431cbc2
DM
321static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
322
323static u32 rt6_peer_genid(void)
324{
325 return atomic_read(&__rt6_peer_genid);
326}
327
b3419363
DM
328void rt6_bind_peer(struct rt6_info *rt, int create)
329{
97bab73f 330 struct inet_peer_base *base;
b3419363
DM
331 struct inet_peer *peer;
332
97bab73f
DM
333 base = inetpeer_base_ptr(rt->_rt6i_peer);
334 if (!base)
335 return;
336
337 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
7b34ca2a
DM
338 if (peer) {
339 if (!rt6_set_peer(rt, peer))
340 inet_putpeer(peer);
341 else
342 rt->rt6i_peer_genid = rt6_peer_genid();
343 }
1da177e4
LT
344}
345
346static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
347 int how)
348{
349 struct rt6_info *rt = (struct rt6_info *)dst;
350 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 351 struct net_device *loopback_dev =
c346dca1 352 dev_net(dev)->loopback_dev;
1da177e4 353
97cac082
DM
354 if (dev != loopback_dev) {
355 if (idev && idev->dev == dev) {
356 struct inet6_dev *loopback_idev =
357 in6_dev_get(loopback_dev);
358 if (loopback_idev) {
359 rt->rt6i_idev = loopback_idev;
360 in6_dev_put(idev);
361 }
362 }
363 if (rt->n && rt->n->dev == dev) {
364 rt->n->dev = loopback_dev;
365 dev_hold(loopback_dev);
366 dev_put(dev);
1da177e4
LT
367 }
368 }
369}
370
a50feda5 371static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 372{
1716a961
G
373 if (rt->rt6i_flags & RTF_EXPIRES) {
374 if (time_after(jiffies, rt->dst.expires))
a50feda5 375 return true;
1716a961 376 } else if (rt->dst.from) {
3fd91fb3 377 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 378 }
a50feda5 379 return false;
1da177e4
LT
380}
381
a50feda5 382static bool rt6_need_strict(const struct in6_addr *daddr)
c71099ac 383{
a02cec21
ED
384 return ipv6_addr_type(daddr) &
385 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
386}
387
1da177e4 388/*
c71099ac 389 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
390 */
391
8ed67789
DL
392static inline struct rt6_info *rt6_device_match(struct net *net,
393 struct rt6_info *rt,
b71d1d42 394 const struct in6_addr *saddr,
1da177e4 395 int oif,
d420895e 396 int flags)
1da177e4
LT
397{
398 struct rt6_info *local = NULL;
399 struct rt6_info *sprt;
400
dd3abc4e
YH
401 if (!oif && ipv6_addr_any(saddr))
402 goto out;
403
d8d1f30b 404 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 405 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
406
407 if (oif) {
1da177e4
LT
408 if (dev->ifindex == oif)
409 return sprt;
410 if (dev->flags & IFF_LOOPBACK) {
38308473 411 if (!sprt->rt6i_idev ||
1da177e4 412 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 413 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 414 continue;
1ab1457c 415 if (local && (!oif ||
1da177e4
LT
416 local->rt6i_idev->dev->ifindex == oif))
417 continue;
418 }
419 local = sprt;
420 }
dd3abc4e
YH
421 } else {
422 if (ipv6_chk_addr(net, saddr, dev,
423 flags & RT6_LOOKUP_F_IFACE))
424 return sprt;
1da177e4 425 }
dd3abc4e 426 }
1da177e4 427
dd3abc4e 428 if (oif) {
1da177e4
LT
429 if (local)
430 return local;
431
d420895e 432 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 433 return net->ipv6.ip6_null_entry;
1da177e4 434 }
dd3abc4e 435out:
1da177e4
LT
436 return rt;
437}
438
27097255
YH
439#ifdef CONFIG_IPV6_ROUTER_PREF
440static void rt6_probe(struct rt6_info *rt)
441{
f2c31e32 442 struct neighbour *neigh;
27097255
YH
443 /*
444 * Okay, this does not seem to be appropriate
445 * for now, however, we need to check if it
446 * is really so; aka Router Reachability Probing.
447 *
448 * Router Reachability Probe MUST be rate-limited
449 * to no more than one per minute.
450 */
97cac082 451 neigh = rt ? rt->n : NULL;
27097255 452 if (!neigh || (neigh->nud_state & NUD_VALID))
fdd6681d 453 return;
27097255
YH
454 read_lock_bh(&neigh->lock);
455 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 456 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
457 struct in6_addr mcaddr;
458 struct in6_addr *target;
459
460 neigh->updated = jiffies;
461 read_unlock_bh(&neigh->lock);
462
463 target = (struct in6_addr *)&neigh->primary_key;
464 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 465 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 466 } else {
27097255 467 read_unlock_bh(&neigh->lock);
f2c31e32 468 }
27097255
YH
469}
470#else
471static inline void rt6_probe(struct rt6_info *rt)
472{
27097255
YH
473}
474#endif
475
1da177e4 476/*
554cfb7e 477 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 478 */
b6f99a21 479static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 480{
d1918542 481 struct net_device *dev = rt->dst.dev;
161980f4 482 if (!oif || dev->ifindex == oif)
554cfb7e 483 return 2;
161980f4
DM
484 if ((dev->flags & IFF_LOOPBACK) &&
485 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
486 return 1;
487 return 0;
554cfb7e 488}
1da177e4 489
b6f99a21 490static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 491{
f2c31e32 492 struct neighbour *neigh;
398bcbeb 493 int m;
f2c31e32 494
97cac082 495 neigh = rt->n;
4d0c5911
YH
496 if (rt->rt6i_flags & RTF_NONEXTHOP ||
497 !(rt->rt6i_flags & RTF_GATEWAY))
498 m = 1;
499 else if (neigh) {
554cfb7e
YH
500 read_lock_bh(&neigh->lock);
501 if (neigh->nud_state & NUD_VALID)
4d0c5911 502 m = 2;
398bcbeb
YH
503#ifdef CONFIG_IPV6_ROUTER_PREF
504 else if (neigh->nud_state & NUD_FAILED)
505 m = 0;
506#endif
507 else
ea73ee23 508 m = 1;
554cfb7e 509 read_unlock_bh(&neigh->lock);
398bcbeb
YH
510 } else
511 m = 0;
554cfb7e 512 return m;
1da177e4
LT
513}
514
554cfb7e
YH
515static int rt6_score_route(struct rt6_info *rt, int oif,
516 int strict)
1da177e4 517{
4d0c5911 518 int m, n;
1ab1457c 519
4d0c5911 520 m = rt6_check_dev(rt, oif);
77d16f45 521 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 522 return -1;
ebacaaa0
YH
523#ifdef CONFIG_IPV6_ROUTER_PREF
524 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
525#endif
4d0c5911 526 n = rt6_check_neigh(rt);
557e92ef 527 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
528 return -1;
529 return m;
530}
531
f11e6659
DM
532static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
533 int *mpri, struct rt6_info *match)
554cfb7e 534{
f11e6659
DM
535 int m;
536
537 if (rt6_check_expired(rt))
538 goto out;
539
540 m = rt6_score_route(rt, oif, strict);
541 if (m < 0)
542 goto out;
543
544 if (m > *mpri) {
545 if (strict & RT6_LOOKUP_F_REACHABLE)
546 rt6_probe(match);
547 *mpri = m;
548 match = rt;
549 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
550 rt6_probe(rt);
551 }
552
553out:
554 return match;
555}
556
557static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
558 struct rt6_info *rr_head,
559 u32 metric, int oif, int strict)
560{
561 struct rt6_info *rt, *match;
554cfb7e 562 int mpri = -1;
1da177e4 563
f11e6659
DM
564 match = NULL;
565 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 566 rt = rt->dst.rt6_next)
f11e6659
DM
567 match = find_match(rt, oif, strict, &mpri, match);
568 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 569 rt = rt->dst.rt6_next)
f11e6659 570 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 571
f11e6659
DM
572 return match;
573}
1da177e4 574
f11e6659
DM
575static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
576{
577 struct rt6_info *match, *rt0;
8ed67789 578 struct net *net;
1da177e4 579
f11e6659
DM
580 rt0 = fn->rr_ptr;
581 if (!rt0)
582 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 583
f11e6659 584 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 585
554cfb7e 586 if (!match &&
f11e6659 587 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 588 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 589
554cfb7e 590 /* no entries matched; do round-robin */
f11e6659
DM
591 if (!next || next->rt6i_metric != rt0->rt6i_metric)
592 next = fn->leaf;
593
594 if (next != rt0)
595 fn->rr_ptr = next;
1da177e4 596 }
1da177e4 597
d1918542 598 net = dev_net(rt0->dst.dev);
a02cec21 599 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
600}
601
70ceb4f5
YH
602#ifdef CONFIG_IPV6_ROUTE_INFO
603int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 604 const struct in6_addr *gwaddr)
70ceb4f5 605{
c346dca1 606 struct net *net = dev_net(dev);
70ceb4f5
YH
607 struct route_info *rinfo = (struct route_info *) opt;
608 struct in6_addr prefix_buf, *prefix;
609 unsigned int pref;
4bed72e4 610 unsigned long lifetime;
70ceb4f5
YH
611 struct rt6_info *rt;
612
613 if (len < sizeof(struct route_info)) {
614 return -EINVAL;
615 }
616
617 /* Sanity check for prefix_len and length */
618 if (rinfo->length > 3) {
619 return -EINVAL;
620 } else if (rinfo->prefix_len > 128) {
621 return -EINVAL;
622 } else if (rinfo->prefix_len > 64) {
623 if (rinfo->length < 2) {
624 return -EINVAL;
625 }
626 } else if (rinfo->prefix_len > 0) {
627 if (rinfo->length < 1) {
628 return -EINVAL;
629 }
630 }
631
632 pref = rinfo->route_pref;
633 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 634 return -EINVAL;
70ceb4f5 635
4bed72e4 636 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
637
638 if (rinfo->length == 3)
639 prefix = (struct in6_addr *)rinfo->prefix;
640 else {
641 /* this function is safe */
642 ipv6_addr_prefix(&prefix_buf,
643 (struct in6_addr *)rinfo->prefix,
644 rinfo->prefix_len);
645 prefix = &prefix_buf;
646 }
647
efa2cea0
DL
648 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
649 dev->ifindex);
70ceb4f5
YH
650
651 if (rt && !lifetime) {
e0a1ad73 652 ip6_del_rt(rt);
70ceb4f5
YH
653 rt = NULL;
654 }
655
656 if (!rt && lifetime)
efa2cea0 657 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
658 pref);
659 else if (rt)
660 rt->rt6i_flags = RTF_ROUTEINFO |
661 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
662
663 if (rt) {
1716a961
G
664 if (!addrconf_finite_timeout(lifetime))
665 rt6_clean_expires(rt);
666 else
667 rt6_set_expires(rt, jiffies + HZ * lifetime);
668
d8d1f30b 669 dst_release(&rt->dst);
70ceb4f5
YH
670 }
671 return 0;
672}
673#endif
674
8ed67789 675#define BACKTRACK(__net, saddr) \
982f56f3 676do { \
8ed67789 677 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 678 struct fib6_node *pn; \
e0eda7bb 679 while (1) { \
982f56f3
YH
680 if (fn->fn_flags & RTN_TL_ROOT) \
681 goto out; \
682 pn = fn->parent; \
683 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 684 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
685 else \
686 fn = pn; \
687 if (fn->fn_flags & RTN_RTINFO) \
688 goto restart; \
c71099ac 689 } \
c71099ac 690 } \
38308473 691} while (0)
c71099ac 692
8ed67789
DL
693static struct rt6_info *ip6_pol_route_lookup(struct net *net,
694 struct fib6_table *table,
4c9483b2 695 struct flowi6 *fl6, int flags)
1da177e4
LT
696{
697 struct fib6_node *fn;
698 struct rt6_info *rt;
699
c71099ac 700 read_lock_bh(&table->tb6_lock);
4c9483b2 701 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
702restart:
703 rt = fn->leaf;
4c9483b2
DM
704 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
705 BACKTRACK(net, &fl6->saddr);
c71099ac 706out:
d8d1f30b 707 dst_use(&rt->dst, jiffies);
c71099ac 708 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
709 return rt;
710
711}
712
ea6e574e
FW
713struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
714 int flags)
715{
716 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
717}
718EXPORT_SYMBOL_GPL(ip6_route_lookup);
719
9acd9f3a
YH
720struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
721 const struct in6_addr *saddr, int oif, int strict)
c71099ac 722{
4c9483b2
DM
723 struct flowi6 fl6 = {
724 .flowi6_oif = oif,
725 .daddr = *daddr,
c71099ac
TG
726 };
727 struct dst_entry *dst;
77d16f45 728 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 729
adaa70bb 730 if (saddr) {
4c9483b2 731 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
732 flags |= RT6_LOOKUP_F_HAS_SADDR;
733 }
734
4c9483b2 735 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
736 if (dst->error == 0)
737 return (struct rt6_info *) dst;
738
739 dst_release(dst);
740
1da177e4
LT
741 return NULL;
742}
743
7159039a
YH
744EXPORT_SYMBOL(rt6_lookup);
745
c71099ac 746/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
747 It takes new route entry, the addition fails by any reason the
748 route is freed. In any case, if caller does not hold it, it may
749 be destroyed.
750 */
751
86872cb5 752static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
753{
754 int err;
c71099ac 755 struct fib6_table *table;
1da177e4 756
c71099ac
TG
757 table = rt->rt6i_table;
758 write_lock_bh(&table->tb6_lock);
86872cb5 759 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 760 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
761
762 return err;
763}
764
40e22e8f
TG
765int ip6_ins_rt(struct rt6_info *rt)
766{
4d1169c1 767 struct nl_info info = {
d1918542 768 .nl_net = dev_net(rt->dst.dev),
4d1169c1 769 };
528c4ceb 770 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
771}
772
1716a961 773static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 774 const struct in6_addr *daddr,
b71d1d42 775 const struct in6_addr *saddr)
1da177e4 776{
1da177e4
LT
777 struct rt6_info *rt;
778
779 /*
780 * Clone the route.
781 */
782
21efcfa0 783 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
784
785 if (rt) {
14deae41
DM
786 int attempts = !in_softirq();
787
38308473 788 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 789 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 790 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 791 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 792 rt->rt6i_gateway = *daddr;
58c4fb86 793 }
1da177e4 794
1da177e4 795 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
796
797#ifdef CONFIG_IPV6_SUBTREES
798 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 799 rt->rt6i_src.addr = *saddr;
1da177e4
LT
800 rt->rt6i_src.plen = 128;
801 }
802#endif
803
14deae41 804 retry:
8ade06c6 805 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 806 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
807 int saved_rt_min_interval =
808 net->ipv6.sysctl.ip6_rt_gc_min_interval;
809 int saved_rt_elasticity =
810 net->ipv6.sysctl.ip6_rt_gc_elasticity;
811
812 if (attempts-- > 0) {
813 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
814 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
815
86393e52 816 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
817
818 net->ipv6.sysctl.ip6_rt_gc_elasticity =
819 saved_rt_elasticity;
820 net->ipv6.sysctl.ip6_rt_gc_min_interval =
821 saved_rt_min_interval;
822 goto retry;
823 }
824
f3213831 825 net_warn_ratelimited("Neighbour table overflow\n");
d8d1f30b 826 dst_free(&rt->dst);
14deae41
DM
827 return NULL;
828 }
95a9a5ba 829 }
1da177e4 830
95a9a5ba
YH
831 return rt;
832}
1da177e4 833
21efcfa0
ED
834static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
835 const struct in6_addr *daddr)
299d9939 836{
21efcfa0
ED
837 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
838
299d9939 839 if (rt) {
299d9939 840 rt->rt6i_flags |= RTF_CACHE;
97cac082 841 rt->n = neigh_clone(ort->n);
299d9939
YH
842 }
843 return rt;
844}
845
8ed67789 846static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 847 struct flowi6 *fl6, int flags)
1da177e4
LT
848{
849 struct fib6_node *fn;
519fbd87 850 struct rt6_info *rt, *nrt;
c71099ac 851 int strict = 0;
1da177e4 852 int attempts = 3;
519fbd87 853 int err;
53b7997f 854 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 855
77d16f45 856 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
857
858relookup:
c71099ac 859 read_lock_bh(&table->tb6_lock);
1da177e4 860
8238dd06 861restart_2:
4c9483b2 862 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
863
864restart:
4acad72d 865 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 866
4c9483b2 867 BACKTRACK(net, &fl6->saddr);
8ed67789 868 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 869 rt->rt6i_flags & RTF_CACHE)
1ddef044 870 goto out;
1da177e4 871
d8d1f30b 872 dst_hold(&rt->dst);
c71099ac 873 read_unlock_bh(&table->tb6_lock);
fb9de91e 874
97cac082 875 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 876 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 877 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 878 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
879 else
880 goto out2;
e40cf353 881
d8d1f30b 882 dst_release(&rt->dst);
8ed67789 883 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 884
d8d1f30b 885 dst_hold(&rt->dst);
519fbd87 886 if (nrt) {
40e22e8f 887 err = ip6_ins_rt(nrt);
519fbd87 888 if (!err)
1da177e4 889 goto out2;
1da177e4 890 }
1da177e4 891
519fbd87
YH
892 if (--attempts <= 0)
893 goto out2;
894
895 /*
c71099ac 896 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
897 * released someone could insert this route. Relookup.
898 */
d8d1f30b 899 dst_release(&rt->dst);
519fbd87
YH
900 goto relookup;
901
902out:
8238dd06
YH
903 if (reachable) {
904 reachable = 0;
905 goto restart_2;
906 }
d8d1f30b 907 dst_hold(&rt->dst);
c71099ac 908 read_unlock_bh(&table->tb6_lock);
1da177e4 909out2:
d8d1f30b
CG
910 rt->dst.lastuse = jiffies;
911 rt->dst.__use++;
c71099ac
TG
912
913 return rt;
1da177e4
LT
914}
915
8ed67789 916static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 917 struct flowi6 *fl6, int flags)
4acad72d 918{
4c9483b2 919 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
920}
921
72331bc0
SL
922static struct dst_entry *ip6_route_input_lookup(struct net *net,
923 struct net_device *dev,
924 struct flowi6 *fl6, int flags)
925{
926 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
927 flags |= RT6_LOOKUP_F_IFACE;
928
929 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
930}
931
c71099ac
TG
932void ip6_route_input(struct sk_buff *skb)
933{
b71d1d42 934 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 935 struct net *net = dev_net(skb->dev);
adaa70bb 936 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
937 struct flowi6 fl6 = {
938 .flowi6_iif = skb->dev->ifindex,
939 .daddr = iph->daddr,
940 .saddr = iph->saddr,
38308473 941 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
942 .flowi6_mark = skb->mark,
943 .flowi6_proto = iph->nexthdr,
c71099ac 944 };
adaa70bb 945
72331bc0 946 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
947}
948
8ed67789 949static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 950 struct flowi6 *fl6, int flags)
1da177e4 951{
4c9483b2 952 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
953}
954
9c7a4f9c 955struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 956 struct flowi6 *fl6)
c71099ac
TG
957{
958 int flags = 0;
959
1fb9489b 960 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 961
4c9483b2 962 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 963 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 964
4c9483b2 965 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 966 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
967 else if (sk)
968 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 969
4c9483b2 970 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
971}
972
7159039a 973EXPORT_SYMBOL(ip6_route_output);
1da177e4 974
2774c131 975struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 976{
5c1e6aa3 977 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
978 struct dst_entry *new = NULL;
979
f5b0a874 980 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 981 if (rt) {
d8d1f30b 982 new = &rt->dst;
14e50e57 983
8104891b
SK
984 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
985 rt6_init_peer(rt, net->ipv6.peers);
986
14e50e57 987 new->__use = 1;
352e512c
HX
988 new->input = dst_discard;
989 new->output = dst_discard;
14e50e57 990
21efcfa0
ED
991 if (dst_metrics_read_only(&ort->dst))
992 new->_metrics = ort->dst._metrics;
993 else
994 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
995 rt->rt6i_idev = ort->rt6i_idev;
996 if (rt->rt6i_idev)
997 in6_dev_hold(rt->rt6i_idev);
14e50e57 998
4e3fd7a0 999 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1000 rt->rt6i_flags = ort->rt6i_flags;
1001 rt6_clean_expires(rt);
14e50e57
DM
1002 rt->rt6i_metric = 0;
1003
1004 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1005#ifdef CONFIG_IPV6_SUBTREES
1006 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1007#endif
1008
1009 dst_free(new);
1010 }
1011
69ead7af
DM
1012 dst_release(dst_orig);
1013 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1014}
14e50e57 1015
1da177e4
LT
1016/*
1017 * Destination cache support functions
1018 */
1019
1020static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1021{
1022 struct rt6_info *rt;
1023
1024 rt = (struct rt6_info *) dst;
1025
6f3118b5
ND
1026 /* All IPV6 dsts are created with ->obsolete set to the value
1027 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1028 * into this function always.
1029 */
1030 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1031 return NULL;
1032
6431cbc2
DM
1033 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1034 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
97bab73f 1035 if (!rt6_has_peer(rt))
6431cbc2
DM
1036 rt6_bind_peer(rt, 0);
1037 rt->rt6i_peer_genid = rt6_peer_genid();
1038 }
1da177e4 1039 return dst;
6431cbc2 1040 }
1da177e4
LT
1041 return NULL;
1042}
1043
1044static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1045{
1046 struct rt6_info *rt = (struct rt6_info *) dst;
1047
1048 if (rt) {
54c1a859
YH
1049 if (rt->rt6i_flags & RTF_CACHE) {
1050 if (rt6_check_expired(rt)) {
1051 ip6_del_rt(rt);
1052 dst = NULL;
1053 }
1054 } else {
1da177e4 1055 dst_release(dst);
54c1a859
YH
1056 dst = NULL;
1057 }
1da177e4 1058 }
54c1a859 1059 return dst;
1da177e4
LT
1060}
1061
1062static void ip6_link_failure(struct sk_buff *skb)
1063{
1064 struct rt6_info *rt;
1065
3ffe533c 1066 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1067
adf30907 1068 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1069 if (rt) {
1716a961
G
1070 if (rt->rt6i_flags & RTF_CACHE)
1071 rt6_update_expires(rt, 0);
1072 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1da177e4
LT
1073 rt->rt6i_node->fn_sernum = -1;
1074 }
1075}
1076
6700c270
DM
1077static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1078 struct sk_buff *skb, u32 mtu)
1da177e4
LT
1079{
1080 struct rt6_info *rt6 = (struct rt6_info*)dst;
1081
81aded24 1082 dst_confirm(dst);
1da177e4 1083 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
81aded24
DM
1084 struct net *net = dev_net(dst->dev);
1085
1da177e4
LT
1086 rt6->rt6i_flags |= RTF_MODIFIED;
1087 if (mtu < IPV6_MIN_MTU) {
defb3519 1088 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1089 mtu = IPV6_MIN_MTU;
defb3519
DM
1090 features |= RTAX_FEATURE_ALLFRAG;
1091 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1092 }
defb3519 1093 dst_metric_set(dst, RTAX_MTU, mtu);
81aded24 1094 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1095 }
1096}
1097
42ae66c8
DM
1098void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1099 int oif, u32 mark)
81aded24
DM
1100{
1101 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1102 struct dst_entry *dst;
1103 struct flowi6 fl6;
1104
1105 memset(&fl6, 0, sizeof(fl6));
1106 fl6.flowi6_oif = oif;
1107 fl6.flowi6_mark = mark;
3e12939a 1108 fl6.flowi6_flags = 0;
81aded24
DM
1109 fl6.daddr = iph->daddr;
1110 fl6.saddr = iph->saddr;
1111 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1112
1113 dst = ip6_route_output(net, NULL, &fl6);
1114 if (!dst->error)
6700c270 1115 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
81aded24
DM
1116 dst_release(dst);
1117}
1118EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1119
1120void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1121{
1122 ip6_update_pmtu(skb, sock_net(sk), mtu,
1123 sk->sk_bound_dev_if, sk->sk_mark);
1124}
1125EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1126
3a5ad2ee
DM
1127void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1128{
1129 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1130 struct dst_entry *dst;
1131 struct flowi6 fl6;
1132
1133 memset(&fl6, 0, sizeof(fl6));
1134 fl6.flowi6_oif = oif;
1135 fl6.flowi6_mark = mark;
1136 fl6.flowi6_flags = 0;
1137 fl6.daddr = iph->daddr;
1138 fl6.saddr = iph->saddr;
1139 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1140
1141 dst = ip6_route_output(net, NULL, &fl6);
1142 if (!dst->error)
6700c270 1143 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1144 dst_release(dst);
1145}
1146EXPORT_SYMBOL_GPL(ip6_redirect);
1147
1148void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1149{
1150 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1151}
1152EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1153
0dbaee3b 1154static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1155{
0dbaee3b
DM
1156 struct net_device *dev = dst->dev;
1157 unsigned int mtu = dst_mtu(dst);
1158 struct net *net = dev_net(dev);
1159
1da177e4
LT
1160 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1161
5578689a
DL
1162 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1163 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1164
1165 /*
1ab1457c
YH
1166 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1167 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1168 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1169 * rely only on pmtu discovery"
1170 */
1171 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1172 mtu = IPV6_MAXPLEN;
1173 return mtu;
1174}
1175
ebb762f2 1176static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1177{
d33e4553 1178 struct inet6_dev *idev;
618f9bc7
SK
1179 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1180
1181 if (mtu)
1182 return mtu;
1183
1184 mtu = IPV6_MIN_MTU;
d33e4553
DM
1185
1186 rcu_read_lock();
1187 idev = __in6_dev_get(dst->dev);
1188 if (idev)
1189 mtu = idev->cnf.mtu6;
1190 rcu_read_unlock();
1191
1192 return mtu;
1193}
1194
3b00944c
YH
1195static struct dst_entry *icmp6_dst_gc_list;
1196static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1197
3b00944c 1198struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1199 struct neighbour *neigh,
87a11578 1200 struct flowi6 *fl6)
1da177e4 1201{
87a11578 1202 struct dst_entry *dst;
1da177e4
LT
1203 struct rt6_info *rt;
1204 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1205 struct net *net = dev_net(dev);
1da177e4 1206
38308473 1207 if (unlikely(!idev))
122bdf67 1208 return ERR_PTR(-ENODEV);
1da177e4 1209
8b96d22d 1210 rt = ip6_dst_alloc(net, dev, 0, NULL);
38308473 1211 if (unlikely(!rt)) {
1da177e4 1212 in6_dev_put(idev);
87a11578 1213 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1214 goto out;
1215 }
1216
1da177e4
LT
1217 if (neigh)
1218 neigh_hold(neigh);
14deae41 1219 else {
f894cbf8 1220 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
b43faac6 1221 if (IS_ERR(neigh)) {
252c3d84 1222 in6_dev_put(idev);
b43faac6
DM
1223 dst_free(&rt->dst);
1224 return ERR_CAST(neigh);
1225 }
14deae41 1226 }
1da177e4 1227
8e2ec639
YZ
1228 rt->dst.flags |= DST_HOST;
1229 rt->dst.output = ip6_output;
97cac082 1230 rt->n = neigh;
d8d1f30b 1231 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1232 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1233 rt->rt6i_dst.plen = 128;
1234 rt->rt6i_idev = idev;
7011687f 1235 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1236
3b00944c 1237 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1238 rt->dst.next = icmp6_dst_gc_list;
1239 icmp6_dst_gc_list = &rt->dst;
3b00944c 1240 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1241
5578689a 1242 fib6_force_start_gc(net);
1da177e4 1243
87a11578
DM
1244 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1245
1da177e4 1246out:
87a11578 1247 return dst;
1da177e4
LT
1248}
1249
3d0f24a7 1250int icmp6_dst_gc(void)
1da177e4 1251{
e9476e95 1252 struct dst_entry *dst, **pprev;
3d0f24a7 1253 int more = 0;
1da177e4 1254
3b00944c
YH
1255 spin_lock_bh(&icmp6_dst_lock);
1256 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1257
1da177e4
LT
1258 while ((dst = *pprev) != NULL) {
1259 if (!atomic_read(&dst->__refcnt)) {
1260 *pprev = dst->next;
1261 dst_free(dst);
1da177e4
LT
1262 } else {
1263 pprev = &dst->next;
3d0f24a7 1264 ++more;
1da177e4
LT
1265 }
1266 }
1267
3b00944c 1268 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1269
3d0f24a7 1270 return more;
1da177e4
LT
1271}
1272
1e493d19
DM
1273static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1274 void *arg)
1275{
1276 struct dst_entry *dst, **pprev;
1277
1278 spin_lock_bh(&icmp6_dst_lock);
1279 pprev = &icmp6_dst_gc_list;
1280 while ((dst = *pprev) != NULL) {
1281 struct rt6_info *rt = (struct rt6_info *) dst;
1282 if (func(rt, arg)) {
1283 *pprev = dst->next;
1284 dst_free(dst);
1285 } else {
1286 pprev = &dst->next;
1287 }
1288 }
1289 spin_unlock_bh(&icmp6_dst_lock);
1290}
1291
569d3645 1292static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1293{
1da177e4 1294 unsigned long now = jiffies;
86393e52 1295 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1296 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1297 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1298 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1299 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1300 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1301 int entries;
7019b78e 1302
fc66f95c 1303 entries = dst_entries_get_fast(ops);
7019b78e 1304 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1305 entries <= rt_max_size)
1da177e4
LT
1306 goto out;
1307
6891a346
BT
1308 net->ipv6.ip6_rt_gc_expire++;
1309 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1310 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1311 entries = dst_entries_get_slow(ops);
1312 if (entries < ops->gc_thresh)
7019b78e 1313 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1314out:
7019b78e 1315 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1316 return entries > rt_max_size;
1da177e4
LT
1317}
1318
1319/* Clean host part of a prefix. Not necessary in radix tree,
1320 but results in cleaner routing tables.
1321
1322 Remove it only when all the things will work!
1323 */
1324
6b75d090 1325int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1326{
5170ae82 1327 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1328 if (hoplimit == 0) {
6b75d090 1329 struct net_device *dev = dst->dev;
c68f24cc
ED
1330 struct inet6_dev *idev;
1331
1332 rcu_read_lock();
1333 idev = __in6_dev_get(dev);
1334 if (idev)
6b75d090 1335 hoplimit = idev->cnf.hop_limit;
c68f24cc 1336 else
53b7997f 1337 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1338 rcu_read_unlock();
1da177e4
LT
1339 }
1340 return hoplimit;
1341}
abbf46ae 1342EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1343
1344/*
1345 *
1346 */
1347
86872cb5 1348int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1349{
1350 int err;
5578689a 1351 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1352 struct rt6_info *rt = NULL;
1353 struct net_device *dev = NULL;
1354 struct inet6_dev *idev = NULL;
c71099ac 1355 struct fib6_table *table;
1da177e4
LT
1356 int addr_type;
1357
86872cb5 1358 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1359 return -EINVAL;
1360#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1361 if (cfg->fc_src_len)
1da177e4
LT
1362 return -EINVAL;
1363#endif
86872cb5 1364 if (cfg->fc_ifindex) {
1da177e4 1365 err = -ENODEV;
5578689a 1366 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1367 if (!dev)
1368 goto out;
1369 idev = in6_dev_get(dev);
1370 if (!idev)
1371 goto out;
1372 }
1373
86872cb5
TG
1374 if (cfg->fc_metric == 0)
1375 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1376
d71314b4 1377 err = -ENOBUFS;
38308473
DM
1378 if (cfg->fc_nlinfo.nlh &&
1379 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1380 table = fib6_get_table(net, cfg->fc_table);
38308473 1381 if (!table) {
f3213831 1382 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1383 table = fib6_new_table(net, cfg->fc_table);
1384 }
1385 } else {
1386 table = fib6_new_table(net, cfg->fc_table);
1387 }
38308473
DM
1388
1389 if (!table)
c71099ac 1390 goto out;
c71099ac 1391
8b96d22d 1392 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1da177e4 1393
38308473 1394 if (!rt) {
1da177e4
LT
1395 err = -ENOMEM;
1396 goto out;
1397 }
1398
1716a961
G
1399 if (cfg->fc_flags & RTF_EXPIRES)
1400 rt6_set_expires(rt, jiffies +
1401 clock_t_to_jiffies(cfg->fc_expires));
1402 else
1403 rt6_clean_expires(rt);
1da177e4 1404
86872cb5
TG
1405 if (cfg->fc_protocol == RTPROT_UNSPEC)
1406 cfg->fc_protocol = RTPROT_BOOT;
1407 rt->rt6i_protocol = cfg->fc_protocol;
1408
1409 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1410
1411 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1412 rt->dst.input = ip6_mc_input;
ab79ad14
1413 else if (cfg->fc_flags & RTF_LOCAL)
1414 rt->dst.input = ip6_input;
1da177e4 1415 else
d8d1f30b 1416 rt->dst.input = ip6_forward;
1da177e4 1417
d8d1f30b 1418 rt->dst.output = ip6_output;
1da177e4 1419
86872cb5
TG
1420 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1421 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1422 if (rt->rt6i_dst.plen == 128)
11d53b49 1423 rt->dst.flags |= DST_HOST;
1da177e4 1424
8e2ec639
YZ
1425 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1426 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1427 if (!metrics) {
1428 err = -ENOMEM;
1429 goto out;
1430 }
1431 dst_init_metrics(&rt->dst, metrics, 0);
1432 }
1da177e4 1433#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1434 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1435 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1436#endif
1437
86872cb5 1438 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1439
1440 /* We cannot add true routes via loopback here,
1441 they would result in kernel looping; promote them to reject routes
1442 */
86872cb5 1443 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1444 (dev && (dev->flags & IFF_LOOPBACK) &&
1445 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1446 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1447 /* hold loopback dev/idev if we haven't done so. */
5578689a 1448 if (dev != net->loopback_dev) {
1da177e4
LT
1449 if (dev) {
1450 dev_put(dev);
1451 in6_dev_put(idev);
1452 }
5578689a 1453 dev = net->loopback_dev;
1da177e4
LT
1454 dev_hold(dev);
1455 idev = in6_dev_get(dev);
1456 if (!idev) {
1457 err = -ENODEV;
1458 goto out;
1459 }
1460 }
d8d1f30b
CG
1461 rt->dst.output = ip6_pkt_discard_out;
1462 rt->dst.input = ip6_pkt_discard;
1da177e4 1463 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1464 switch (cfg->fc_type) {
1465 case RTN_BLACKHOLE:
1466 rt->dst.error = -EINVAL;
1467 break;
1468 case RTN_PROHIBIT:
1469 rt->dst.error = -EACCES;
1470 break;
b4949ab2
ND
1471 case RTN_THROW:
1472 rt->dst.error = -EAGAIN;
1473 break;
ef2c7d7b
ND
1474 default:
1475 rt->dst.error = -ENETUNREACH;
1476 break;
1477 }
1da177e4
LT
1478 goto install_route;
1479 }
1480
86872cb5 1481 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1482 const struct in6_addr *gw_addr;
1da177e4
LT
1483 int gwa_type;
1484
86872cb5 1485 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1486 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1487 gwa_type = ipv6_addr_type(gw_addr);
1488
1489 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1490 struct rt6_info *grt;
1491
1492 /* IPv6 strictly inhibits using not link-local
1493 addresses as nexthop address.
1494 Otherwise, router will not able to send redirects.
1495 It is very good, but in some (rare!) circumstances
1496 (SIT, PtP, NBMA NOARP links) it is handy to allow
1497 some exceptions. --ANK
1498 */
1499 err = -EINVAL;
38308473 1500 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1501 goto out;
1502
5578689a 1503 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1504
1505 err = -EHOSTUNREACH;
38308473 1506 if (!grt)
1da177e4
LT
1507 goto out;
1508 if (dev) {
d1918542 1509 if (dev != grt->dst.dev) {
d8d1f30b 1510 dst_release(&grt->dst);
1da177e4
LT
1511 goto out;
1512 }
1513 } else {
d1918542 1514 dev = grt->dst.dev;
1da177e4
LT
1515 idev = grt->rt6i_idev;
1516 dev_hold(dev);
1517 in6_dev_hold(grt->rt6i_idev);
1518 }
38308473 1519 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1520 err = 0;
d8d1f30b 1521 dst_release(&grt->dst);
1da177e4
LT
1522
1523 if (err)
1524 goto out;
1525 }
1526 err = -EINVAL;
38308473 1527 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1528 goto out;
1529 }
1530
1531 err = -ENODEV;
38308473 1532 if (!dev)
1da177e4
LT
1533 goto out;
1534
c3968a85
DW
1535 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1536 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1537 err = -EINVAL;
1538 goto out;
1539 }
4e3fd7a0 1540 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1541 rt->rt6i_prefsrc.plen = 128;
1542 } else
1543 rt->rt6i_prefsrc.plen = 0;
1544
86872cb5 1545 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1546 err = rt6_bind_neighbour(rt, dev);
f83c7790 1547 if (err)
1da177e4 1548 goto out;
1da177e4
LT
1549 }
1550
86872cb5 1551 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1552
1553install_route:
86872cb5
TG
1554 if (cfg->fc_mx) {
1555 struct nlattr *nla;
1556 int remaining;
1557
1558 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1559 int type = nla_type(nla);
86872cb5
TG
1560
1561 if (type) {
1562 if (type > RTAX_MAX) {
1da177e4
LT
1563 err = -EINVAL;
1564 goto out;
1565 }
86872cb5 1566
defb3519 1567 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1568 }
1da177e4
LT
1569 }
1570 }
1571
d8d1f30b 1572 rt->dst.dev = dev;
1da177e4 1573 rt->rt6i_idev = idev;
c71099ac 1574 rt->rt6i_table = table;
63152fc0 1575
c346dca1 1576 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1577
86872cb5 1578 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1579
1580out:
1581 if (dev)
1582 dev_put(dev);
1583 if (idev)
1584 in6_dev_put(idev);
1585 if (rt)
d8d1f30b 1586 dst_free(&rt->dst);
1da177e4
LT
1587 return err;
1588}
1589
86872cb5 1590static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1591{
1592 int err;
c71099ac 1593 struct fib6_table *table;
d1918542 1594 struct net *net = dev_net(rt->dst.dev);
1da177e4 1595
6825a26c
G
1596 if (rt == net->ipv6.ip6_null_entry) {
1597 err = -ENOENT;
1598 goto out;
1599 }
6c813a72 1600
c71099ac
TG
1601 table = rt->rt6i_table;
1602 write_lock_bh(&table->tb6_lock);
86872cb5 1603 err = fib6_del(rt, info);
c71099ac 1604 write_unlock_bh(&table->tb6_lock);
1da177e4 1605
6825a26c
G
1606out:
1607 dst_release(&rt->dst);
1da177e4
LT
1608 return err;
1609}
1610
e0a1ad73
TG
1611int ip6_del_rt(struct rt6_info *rt)
1612{
4d1169c1 1613 struct nl_info info = {
d1918542 1614 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1615 };
528c4ceb 1616 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1617}
1618
86872cb5 1619static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1620{
c71099ac 1621 struct fib6_table *table;
1da177e4
LT
1622 struct fib6_node *fn;
1623 struct rt6_info *rt;
1624 int err = -ESRCH;
1625
5578689a 1626 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1627 if (!table)
c71099ac
TG
1628 return err;
1629
1630 read_lock_bh(&table->tb6_lock);
1da177e4 1631
c71099ac 1632 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1633 &cfg->fc_dst, cfg->fc_dst_len,
1634 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1635
1da177e4 1636 if (fn) {
d8d1f30b 1637 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1638 if (cfg->fc_ifindex &&
d1918542
DM
1639 (!rt->dst.dev ||
1640 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1641 continue;
86872cb5
TG
1642 if (cfg->fc_flags & RTF_GATEWAY &&
1643 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1644 continue;
86872cb5 1645 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1646 continue;
d8d1f30b 1647 dst_hold(&rt->dst);
c71099ac 1648 read_unlock_bh(&table->tb6_lock);
1da177e4 1649
86872cb5 1650 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1651 }
1652 }
c71099ac 1653 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1654
1655 return err;
1656}
1657
6700c270 1658static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 1659{
e8599ff4 1660 struct net *net = dev_net(skb->dev);
a6279458 1661 struct netevent_redirect netevent;
e8599ff4
DM
1662 struct rt6_info *rt, *nrt = NULL;
1663 const struct in6_addr *target;
e8599ff4 1664 struct ndisc_options ndopts;
6e157b6a
DM
1665 const struct in6_addr *dest;
1666 struct neighbour *old_neigh;
e8599ff4
DM
1667 struct inet6_dev *in6_dev;
1668 struct neighbour *neigh;
1669 struct icmp6hdr *icmph;
6e157b6a
DM
1670 int optlen, on_link;
1671 u8 *lladdr;
e8599ff4
DM
1672
1673 optlen = skb->tail - skb->transport_header;
1674 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1675
1676 if (optlen < 0) {
6e157b6a 1677 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
1678 return;
1679 }
1680
1681 icmph = icmp6_hdr(skb);
1682 target = (const struct in6_addr *) (icmph + 1);
1683 dest = target + 1;
1684
1685 if (ipv6_addr_is_multicast(dest)) {
6e157b6a 1686 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
1687 return;
1688 }
1689
6e157b6a 1690 on_link = 0;
e8599ff4
DM
1691 if (ipv6_addr_equal(dest, target)) {
1692 on_link = 1;
1693 } else if (ipv6_addr_type(target) !=
1694 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 1695 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
1696 return;
1697 }
1698
1699 in6_dev = __in6_dev_get(skb->dev);
1700 if (!in6_dev)
1701 return;
1702 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1703 return;
1704
1705 /* RFC2461 8.1:
1706 * The IP source address of the Redirect MUST be the same as the current
1707 * first-hop router for the specified ICMP Destination Address.
1708 */
1709
1710 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1711 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1712 return;
1713 }
6e157b6a
DM
1714
1715 lladdr = NULL;
e8599ff4
DM
1716 if (ndopts.nd_opts_tgt_lladdr) {
1717 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1718 skb->dev);
1719 if (!lladdr) {
1720 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1721 return;
1722 }
1723 }
1724
6e157b6a
DM
1725 rt = (struct rt6_info *) dst;
1726 if (rt == net->ipv6.ip6_null_entry) {
1727 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 1728 return;
6e157b6a 1729 }
e8599ff4 1730
6e157b6a
DM
1731 /* Redirect received -> path was valid.
1732 * Look, redirects are sent only in response to data packets,
1733 * so that this nexthop apparently is reachable. --ANK
1734 */
1735 dst_confirm(&rt->dst);
a6279458 1736
6e157b6a
DM
1737 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1738 if (!neigh)
1739 return;
a6279458 1740
6e157b6a
DM
1741 /* Duplicate redirect: silently ignore. */
1742 old_neigh = rt->n;
1743 if (neigh == old_neigh)
a6279458 1744 goto out;
1da177e4 1745
1da177e4
LT
1746 /*
1747 * We have finally decided to accept it.
1748 */
1749
1ab1457c 1750 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1751 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1752 NEIGH_UPDATE_F_OVERRIDE|
1753 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1754 NEIGH_UPDATE_F_ISROUTER))
1755 );
1756
21efcfa0 1757 nrt = ip6_rt_copy(rt, dest);
38308473 1758 if (!nrt)
1da177e4
LT
1759 goto out;
1760
1761 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1762 if (on_link)
1763 nrt->rt6i_flags &= ~RTF_GATEWAY;
1764
4e3fd7a0 1765 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
97cac082 1766 nrt->n = neigh_clone(neigh);
1da177e4 1767
40e22e8f 1768 if (ip6_ins_rt(nrt))
1da177e4
LT
1769 goto out;
1770
d8d1f30b 1771 netevent.old = &rt->dst;
1d248b1c 1772 netevent.old_neigh = old_neigh;
d8d1f30b 1773 netevent.new = &nrt->dst;
1d248b1c
DM
1774 netevent.new_neigh = neigh;
1775 netevent.daddr = dest;
8d71740c
TT
1776 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1777
38308473 1778 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 1779 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 1780 ip6_del_rt(rt);
1da177e4
LT
1781 }
1782
1783out:
e8599ff4 1784 neigh_release(neigh);
6e157b6a
DM
1785}
1786
1da177e4
LT
1787/*
1788 * Misc support functions
1789 */
1790
1716a961 1791static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1792 const struct in6_addr *dest)
1da177e4 1793{
d1918542 1794 struct net *net = dev_net(ort->dst.dev);
8b96d22d
DM
1795 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1796 ort->rt6i_table);
1da177e4
LT
1797
1798 if (rt) {
d8d1f30b
CG
1799 rt->dst.input = ort->dst.input;
1800 rt->dst.output = ort->dst.output;
8e2ec639 1801 rt->dst.flags |= DST_HOST;
d8d1f30b 1802
4e3fd7a0 1803 rt->rt6i_dst.addr = *dest;
8e2ec639 1804 rt->rt6i_dst.plen = 128;
defb3519 1805 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1806 rt->dst.error = ort->dst.error;
1da177e4
LT
1807 rt->rt6i_idev = ort->rt6i_idev;
1808 if (rt->rt6i_idev)
1809 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1810 rt->dst.lastuse = jiffies;
1da177e4 1811
4e3fd7a0 1812 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1813 rt->rt6i_flags = ort->rt6i_flags;
1814 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1815 (RTF_DEFAULT | RTF_ADDRCONF))
1816 rt6_set_from(rt, ort);
1817 else
1818 rt6_clean_expires(rt);
1da177e4
LT
1819 rt->rt6i_metric = 0;
1820
1da177e4
LT
1821#ifdef CONFIG_IPV6_SUBTREES
1822 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1823#endif
0f6c6392 1824 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1825 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1826 }
1827 return rt;
1828}
1829
70ceb4f5 1830#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1831static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1832 const struct in6_addr *prefix, int prefixlen,
1833 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1834{
1835 struct fib6_node *fn;
1836 struct rt6_info *rt = NULL;
c71099ac
TG
1837 struct fib6_table *table;
1838
efa2cea0 1839 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1840 if (!table)
c71099ac 1841 return NULL;
70ceb4f5 1842
5744dd9b 1843 read_lock_bh(&table->tb6_lock);
c71099ac 1844 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1845 if (!fn)
1846 goto out;
1847
d8d1f30b 1848 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1849 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1850 continue;
1851 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1852 continue;
1853 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1854 continue;
d8d1f30b 1855 dst_hold(&rt->dst);
70ceb4f5
YH
1856 break;
1857 }
1858out:
5744dd9b 1859 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1860 return rt;
1861}
1862
efa2cea0 1863static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1864 const struct in6_addr *prefix, int prefixlen,
1865 const struct in6_addr *gwaddr, int ifindex,
95c96174 1866 unsigned int pref)
70ceb4f5 1867{
86872cb5
TG
1868 struct fib6_config cfg = {
1869 .fc_table = RT6_TABLE_INFO,
238fc7ea 1870 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1871 .fc_ifindex = ifindex,
1872 .fc_dst_len = prefixlen,
1873 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1874 RTF_UP | RTF_PREF(pref),
15e47304 1875 .fc_nlinfo.portid = 0,
efa2cea0
DL
1876 .fc_nlinfo.nlh = NULL,
1877 .fc_nlinfo.nl_net = net,
86872cb5
TG
1878 };
1879
4e3fd7a0
AD
1880 cfg.fc_dst = *prefix;
1881 cfg.fc_gateway = *gwaddr;
70ceb4f5 1882
e317da96
YH
1883 /* We should treat it as a default route if prefix length is 0. */
1884 if (!prefixlen)
86872cb5 1885 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1886
86872cb5 1887 ip6_route_add(&cfg);
70ceb4f5 1888
efa2cea0 1889 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1890}
1891#endif
1892
b71d1d42 1893struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1894{
1da177e4 1895 struct rt6_info *rt;
c71099ac 1896 struct fib6_table *table;
1da177e4 1897
c346dca1 1898 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1899 if (!table)
c71099ac 1900 return NULL;
1da177e4 1901
5744dd9b 1902 read_lock_bh(&table->tb6_lock);
d8d1f30b 1903 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1904 if (dev == rt->dst.dev &&
045927ff 1905 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1906 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1907 break;
1908 }
1909 if (rt)
d8d1f30b 1910 dst_hold(&rt->dst);
5744dd9b 1911 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1912 return rt;
1913}
1914
b71d1d42 1915struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1916 struct net_device *dev,
1917 unsigned int pref)
1da177e4 1918{
86872cb5
TG
1919 struct fib6_config cfg = {
1920 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1921 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1922 .fc_ifindex = dev->ifindex,
1923 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1924 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 1925 .fc_nlinfo.portid = 0,
5578689a 1926 .fc_nlinfo.nlh = NULL,
c346dca1 1927 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1928 };
1da177e4 1929
4e3fd7a0 1930 cfg.fc_gateway = *gwaddr;
1da177e4 1931
86872cb5 1932 ip6_route_add(&cfg);
1da177e4 1933
1da177e4
LT
1934 return rt6_get_dflt_router(gwaddr, dev);
1935}
1936
7b4da532 1937void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1938{
1939 struct rt6_info *rt;
c71099ac
TG
1940 struct fib6_table *table;
1941
1942 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1943 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1944 if (!table)
c71099ac 1945 return;
1da177e4
LT
1946
1947restart:
c71099ac 1948 read_lock_bh(&table->tb6_lock);
d8d1f30b 1949 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1950 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1951 dst_hold(&rt->dst);
c71099ac 1952 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1953 ip6_del_rt(rt);
1da177e4
LT
1954 goto restart;
1955 }
1956 }
c71099ac 1957 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1958}
1959
5578689a
DL
1960static void rtmsg_to_fib6_config(struct net *net,
1961 struct in6_rtmsg *rtmsg,
86872cb5
TG
1962 struct fib6_config *cfg)
1963{
1964 memset(cfg, 0, sizeof(*cfg));
1965
1966 cfg->fc_table = RT6_TABLE_MAIN;
1967 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1968 cfg->fc_metric = rtmsg->rtmsg_metric;
1969 cfg->fc_expires = rtmsg->rtmsg_info;
1970 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1971 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1972 cfg->fc_flags = rtmsg->rtmsg_flags;
1973
5578689a 1974 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1975
4e3fd7a0
AD
1976 cfg->fc_dst = rtmsg->rtmsg_dst;
1977 cfg->fc_src = rtmsg->rtmsg_src;
1978 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1979}
1980
5578689a 1981int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1982{
86872cb5 1983 struct fib6_config cfg;
1da177e4
LT
1984 struct in6_rtmsg rtmsg;
1985 int err;
1986
1987 switch(cmd) {
1988 case SIOCADDRT: /* Add a route */
1989 case SIOCDELRT: /* Delete a route */
1990 if (!capable(CAP_NET_ADMIN))
1991 return -EPERM;
1992 err = copy_from_user(&rtmsg, arg,
1993 sizeof(struct in6_rtmsg));
1994 if (err)
1995 return -EFAULT;
86872cb5 1996
5578689a 1997 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1998
1da177e4
LT
1999 rtnl_lock();
2000 switch (cmd) {
2001 case SIOCADDRT:
86872cb5 2002 err = ip6_route_add(&cfg);
1da177e4
LT
2003 break;
2004 case SIOCDELRT:
86872cb5 2005 err = ip6_route_del(&cfg);
1da177e4
LT
2006 break;
2007 default:
2008 err = -EINVAL;
2009 }
2010 rtnl_unlock();
2011
2012 return err;
3ff50b79 2013 }
1da177e4
LT
2014
2015 return -EINVAL;
2016}
2017
2018/*
2019 * Drop the packet on the floor
2020 */
2021
d5fdd6ba 2022static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2023{
612f09e8 2024 int type;
adf30907 2025 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2026 switch (ipstats_mib_noroutes) {
2027 case IPSTATS_MIB_INNOROUTES:
0660e03f 2028 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2029 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2030 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2031 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2032 break;
2033 }
2034 /* FALLTHROUGH */
2035 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2036 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2037 ipstats_mib_noroutes);
612f09e8
YH
2038 break;
2039 }
3ffe533c 2040 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2041 kfree_skb(skb);
2042 return 0;
2043}
2044
9ce8ade0
TG
2045static int ip6_pkt_discard(struct sk_buff *skb)
2046{
612f09e8 2047 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2048}
2049
20380731 2050static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2051{
adf30907 2052 skb->dev = skb_dst(skb)->dev;
612f09e8 2053 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2054}
2055
6723ab54
DM
2056#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2057
9ce8ade0
TG
2058static int ip6_pkt_prohibit(struct sk_buff *skb)
2059{
612f09e8 2060 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2061}
2062
2063static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2064{
adf30907 2065 skb->dev = skb_dst(skb)->dev;
612f09e8 2066 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2067}
2068
6723ab54
DM
2069#endif
2070
1da177e4
LT
2071/*
2072 * Allocate a dst for local (unicast / anycast) address.
2073 */
2074
2075struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2076 const struct in6_addr *addr,
8f031519 2077 bool anycast)
1da177e4 2078{
c346dca1 2079 struct net *net = dev_net(idev->dev);
8b96d22d 2080 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
f83c7790 2081 int err;
1da177e4 2082
38308473 2083 if (!rt) {
f3213831 2084 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
1da177e4 2085 return ERR_PTR(-ENOMEM);
40385653 2086 }
1da177e4 2087
1da177e4
LT
2088 in6_dev_hold(idev);
2089
11d53b49 2090 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2091 rt->dst.input = ip6_input;
2092 rt->dst.output = ip6_output;
1da177e4 2093 rt->rt6i_idev = idev;
1da177e4
LT
2094
2095 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2096 if (anycast)
2097 rt->rt6i_flags |= RTF_ANYCAST;
2098 else
1da177e4 2099 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2100 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2101 if (err) {
d8d1f30b 2102 dst_free(&rt->dst);
f83c7790 2103 return ERR_PTR(err);
1da177e4
LT
2104 }
2105
4e3fd7a0 2106 rt->rt6i_dst.addr = *addr;
1da177e4 2107 rt->rt6i_dst.plen = 128;
5578689a 2108 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2109
d8d1f30b 2110 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2111
2112 return rt;
2113}
2114
c3968a85
DW
2115int ip6_route_get_saddr(struct net *net,
2116 struct rt6_info *rt,
b71d1d42 2117 const struct in6_addr *daddr,
c3968a85
DW
2118 unsigned int prefs,
2119 struct in6_addr *saddr)
2120{
2121 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2122 int err = 0;
2123 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2124 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2125 else
2126 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2127 daddr, prefs, saddr);
2128 return err;
2129}
2130
2131/* remove deleted ip from prefsrc entries */
2132struct arg_dev_net_ip {
2133 struct net_device *dev;
2134 struct net *net;
2135 struct in6_addr *addr;
2136};
2137
2138static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2139{
2140 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2141 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2142 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2143
d1918542 2144 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2145 rt != net->ipv6.ip6_null_entry &&
2146 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2147 /* remove prefsrc entry */
2148 rt->rt6i_prefsrc.plen = 0;
2149 }
2150 return 0;
2151}
2152
2153void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2154{
2155 struct net *net = dev_net(ifp->idev->dev);
2156 struct arg_dev_net_ip adni = {
2157 .dev = ifp->idev->dev,
2158 .net = net,
2159 .addr = &ifp->addr,
2160 };
2161 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2162}
2163
8ed67789
DL
2164struct arg_dev_net {
2165 struct net_device *dev;
2166 struct net *net;
2167};
2168
1da177e4
LT
2169static int fib6_ifdown(struct rt6_info *rt, void *arg)
2170{
bc3ef660 2171 const struct arg_dev_net *adn = arg;
2172 const struct net_device *dev = adn->dev;
8ed67789 2173
d1918542 2174 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2175 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2176 return -1;
c159d30c 2177
1da177e4
LT
2178 return 0;
2179}
2180
f3db4851 2181void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2182{
8ed67789
DL
2183 struct arg_dev_net adn = {
2184 .dev = dev,
2185 .net = net,
2186 };
2187
2188 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2189 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2190}
2191
95c96174 2192struct rt6_mtu_change_arg {
1da177e4 2193 struct net_device *dev;
95c96174 2194 unsigned int mtu;
1da177e4
LT
2195};
2196
2197static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2198{
2199 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2200 struct inet6_dev *idev;
2201
2202 /* In IPv6 pmtu discovery is not optional,
2203 so that RTAX_MTU lock cannot disable it.
2204 We still use this lock to block changes
2205 caused by addrconf/ndisc.
2206 */
2207
2208 idev = __in6_dev_get(arg->dev);
38308473 2209 if (!idev)
1da177e4
LT
2210 return 0;
2211
2212 /* For administrative MTU increase, there is no way to discover
2213 IPv6 PMTU increase, so PMTU increase should be updated here.
2214 Since RFC 1981 doesn't include administrative MTU increase
2215 update PMTU increase is a MUST. (i.e. jumbo frame)
2216 */
2217 /*
2218 If new MTU is less than route PMTU, this new MTU will be the
2219 lowest MTU in the path, update the route PMTU to reflect PMTU
2220 decreases; if new MTU is greater than route PMTU, and the
2221 old MTU is the lowest MTU in the path, update the route PMTU
2222 to reflect the increase. In this case if the other nodes' MTU
2223 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2224 PMTU discouvery.
2225 */
d1918542 2226 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2227 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2228 (dst_mtu(&rt->dst) >= arg->mtu ||
2229 (dst_mtu(&rt->dst) < arg->mtu &&
2230 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2231 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2232 }
1da177e4
LT
2233 return 0;
2234}
2235
95c96174 2236void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2237{
c71099ac
TG
2238 struct rt6_mtu_change_arg arg = {
2239 .dev = dev,
2240 .mtu = mtu,
2241 };
1da177e4 2242
c346dca1 2243 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2244}
2245
ef7c79ed 2246static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2247 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2248 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2249 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2250 [RTA_PRIORITY] = { .type = NLA_U32 },
2251 [RTA_METRICS] = { .type = NLA_NESTED },
2252};
2253
2254static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2255 struct fib6_config *cfg)
1da177e4 2256{
86872cb5
TG
2257 struct rtmsg *rtm;
2258 struct nlattr *tb[RTA_MAX+1];
2259 int err;
1da177e4 2260
86872cb5
TG
2261 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2262 if (err < 0)
2263 goto errout;
1da177e4 2264
86872cb5
TG
2265 err = -EINVAL;
2266 rtm = nlmsg_data(nlh);
2267 memset(cfg, 0, sizeof(*cfg));
2268
2269 cfg->fc_table = rtm->rtm_table;
2270 cfg->fc_dst_len = rtm->rtm_dst_len;
2271 cfg->fc_src_len = rtm->rtm_src_len;
2272 cfg->fc_flags = RTF_UP;
2273 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2274 cfg->fc_type = rtm->rtm_type;
86872cb5 2275
ef2c7d7b
ND
2276 if (rtm->rtm_type == RTN_UNREACHABLE ||
2277 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2278 rtm->rtm_type == RTN_PROHIBIT ||
2279 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2280 cfg->fc_flags |= RTF_REJECT;
2281
ab79ad14
2282 if (rtm->rtm_type == RTN_LOCAL)
2283 cfg->fc_flags |= RTF_LOCAL;
2284
15e47304 2285 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2286 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2287 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2288
2289 if (tb[RTA_GATEWAY]) {
2290 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2291 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2292 }
86872cb5
TG
2293
2294 if (tb[RTA_DST]) {
2295 int plen = (rtm->rtm_dst_len + 7) >> 3;
2296
2297 if (nla_len(tb[RTA_DST]) < plen)
2298 goto errout;
2299
2300 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2301 }
86872cb5
TG
2302
2303 if (tb[RTA_SRC]) {
2304 int plen = (rtm->rtm_src_len + 7) >> 3;
2305
2306 if (nla_len(tb[RTA_SRC]) < plen)
2307 goto errout;
2308
2309 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2310 }
86872cb5 2311
c3968a85
DW
2312 if (tb[RTA_PREFSRC])
2313 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2314
86872cb5
TG
2315 if (tb[RTA_OIF])
2316 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2317
2318 if (tb[RTA_PRIORITY])
2319 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2320
2321 if (tb[RTA_METRICS]) {
2322 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2323 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2324 }
86872cb5
TG
2325
2326 if (tb[RTA_TABLE])
2327 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2328
2329 err = 0;
2330errout:
2331 return err;
1da177e4
LT
2332}
2333
c127ea2c 2334static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2335{
86872cb5
TG
2336 struct fib6_config cfg;
2337 int err;
1da177e4 2338
86872cb5
TG
2339 err = rtm_to_fib6_config(skb, nlh, &cfg);
2340 if (err < 0)
2341 return err;
2342
2343 return ip6_route_del(&cfg);
1da177e4
LT
2344}
2345
c127ea2c 2346static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2347{
86872cb5
TG
2348 struct fib6_config cfg;
2349 int err;
1da177e4 2350
86872cb5
TG
2351 err = rtm_to_fib6_config(skb, nlh, &cfg);
2352 if (err < 0)
2353 return err;
2354
2355 return ip6_route_add(&cfg);
1da177e4
LT
2356}
2357
339bf98f
TG
2358static inline size_t rt6_nlmsg_size(void)
2359{
2360 return NLMSG_ALIGN(sizeof(struct rtmsg))
2361 + nla_total_size(16) /* RTA_SRC */
2362 + nla_total_size(16) /* RTA_DST */
2363 + nla_total_size(16) /* RTA_GATEWAY */
2364 + nla_total_size(16) /* RTA_PREFSRC */
2365 + nla_total_size(4) /* RTA_TABLE */
2366 + nla_total_size(4) /* RTA_IIF */
2367 + nla_total_size(4) /* RTA_OIF */
2368 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2369 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2370 + nla_total_size(sizeof(struct rta_cacheinfo));
2371}
2372
191cd582
BH
2373static int rt6_fill_node(struct net *net,
2374 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 2375 struct in6_addr *dst, struct in6_addr *src,
15e47304 2376 int iif, int type, u32 portid, u32 seq,
7bc570c8 2377 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2378{
2379 struct rtmsg *rtm;
2d7202bf 2380 struct nlmsghdr *nlh;
e3703b3d 2381 long expires;
9e762a4a 2382 u32 table;
f2c31e32 2383 struct neighbour *n;
1da177e4
LT
2384
2385 if (prefix) { /* user wants prefix routes only */
2386 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2387 /* success since this is not a prefix route */
2388 return 1;
2389 }
2390 }
2391
15e47304 2392 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 2393 if (!nlh)
26932566 2394 return -EMSGSIZE;
2d7202bf
TG
2395
2396 rtm = nlmsg_data(nlh);
1da177e4
LT
2397 rtm->rtm_family = AF_INET6;
2398 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2399 rtm->rtm_src_len = rt->rt6i_src.plen;
2400 rtm->rtm_tos = 0;
c71099ac 2401 if (rt->rt6i_table)
9e762a4a 2402 table = rt->rt6i_table->tb6_id;
c71099ac 2403 else
9e762a4a
PM
2404 table = RT6_TABLE_UNSPEC;
2405 rtm->rtm_table = table;
c78679e8
DM
2406 if (nla_put_u32(skb, RTA_TABLE, table))
2407 goto nla_put_failure;
ef2c7d7b
ND
2408 if (rt->rt6i_flags & RTF_REJECT) {
2409 switch (rt->dst.error) {
2410 case -EINVAL:
2411 rtm->rtm_type = RTN_BLACKHOLE;
2412 break;
2413 case -EACCES:
2414 rtm->rtm_type = RTN_PROHIBIT;
2415 break;
b4949ab2
ND
2416 case -EAGAIN:
2417 rtm->rtm_type = RTN_THROW;
2418 break;
ef2c7d7b
ND
2419 default:
2420 rtm->rtm_type = RTN_UNREACHABLE;
2421 break;
2422 }
2423 }
38308473 2424 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2425 rtm->rtm_type = RTN_LOCAL;
d1918542 2426 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2427 rtm->rtm_type = RTN_LOCAL;
2428 else
2429 rtm->rtm_type = RTN_UNICAST;
2430 rtm->rtm_flags = 0;
2431 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2432 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2433 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 2434 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
2435 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2436 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2437 rtm->rtm_protocol = RTPROT_RA;
2438 else
2439 rtm->rtm_protocol = RTPROT_KERNEL;
2440 }
1da177e4 2441
38308473 2442 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2443 rtm->rtm_flags |= RTM_F_CLONED;
2444
2445 if (dst) {
c78679e8
DM
2446 if (nla_put(skb, RTA_DST, 16, dst))
2447 goto nla_put_failure;
1ab1457c 2448 rtm->rtm_dst_len = 128;
1da177e4 2449 } else if (rtm->rtm_dst_len)
c78679e8
DM
2450 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2451 goto nla_put_failure;
1da177e4
LT
2452#ifdef CONFIG_IPV6_SUBTREES
2453 if (src) {
c78679e8
DM
2454 if (nla_put(skb, RTA_SRC, 16, src))
2455 goto nla_put_failure;
1ab1457c 2456 rtm->rtm_src_len = 128;
c78679e8
DM
2457 } else if (rtm->rtm_src_len &&
2458 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2459 goto nla_put_failure;
1da177e4 2460#endif
7bc570c8
YH
2461 if (iif) {
2462#ifdef CONFIG_IPV6_MROUTE
2463 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2464 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2465 if (err <= 0) {
2466 if (!nowait) {
2467 if (err == 0)
2468 return 0;
2469 goto nla_put_failure;
2470 } else {
2471 if (err == -EMSGSIZE)
2472 goto nla_put_failure;
2473 }
2474 }
2475 } else
2476#endif
c78679e8
DM
2477 if (nla_put_u32(skb, RTA_IIF, iif))
2478 goto nla_put_failure;
7bc570c8 2479 } else if (dst) {
1da177e4 2480 struct in6_addr saddr_buf;
c78679e8
DM
2481 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2482 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2483 goto nla_put_failure;
1da177e4 2484 }
2d7202bf 2485
c3968a85
DW
2486 if (rt->rt6i_prefsrc.plen) {
2487 struct in6_addr saddr_buf;
4e3fd7a0 2488 saddr_buf = rt->rt6i_prefsrc.addr;
c78679e8
DM
2489 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2490 goto nla_put_failure;
c3968a85
DW
2491 }
2492
defb3519 2493 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2494 goto nla_put_failure;
2495
97cac082 2496 n = rt->n;
94f826b8 2497 if (n) {
fdd6681d 2498 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
94f826b8 2499 goto nla_put_failure;
94f826b8 2500 }
2d7202bf 2501
c78679e8
DM
2502 if (rt->dst.dev &&
2503 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2504 goto nla_put_failure;
2505 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2506 goto nla_put_failure;
8253947e
LW
2507
2508 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 2509
87a50699 2510 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 2511 goto nla_put_failure;
2d7202bf
TG
2512
2513 return nlmsg_end(skb, nlh);
2514
2515nla_put_failure:
26932566
PM
2516 nlmsg_cancel(skb, nlh);
2517 return -EMSGSIZE;
1da177e4
LT
2518}
2519
1b43af54 2520int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2521{
2522 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2523 int prefix;
2524
2d7202bf
TG
2525 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2526 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2527 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2528 } else
2529 prefix = 0;
2530
191cd582
BH
2531 return rt6_fill_node(arg->net,
2532 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 2533 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2534 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2535}
2536
c127ea2c 2537static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2538{
3b1e0a65 2539 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2540 struct nlattr *tb[RTA_MAX+1];
2541 struct rt6_info *rt;
1da177e4 2542 struct sk_buff *skb;
ab364a6f 2543 struct rtmsg *rtm;
4c9483b2 2544 struct flowi6 fl6;
72331bc0 2545 int err, iif = 0, oif = 0;
1da177e4 2546
ab364a6f
TG
2547 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2548 if (err < 0)
2549 goto errout;
1da177e4 2550
ab364a6f 2551 err = -EINVAL;
4c9483b2 2552 memset(&fl6, 0, sizeof(fl6));
1da177e4 2553
ab364a6f
TG
2554 if (tb[RTA_SRC]) {
2555 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2556 goto errout;
2557
4e3fd7a0 2558 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2559 }
2560
2561 if (tb[RTA_DST]) {
2562 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2563 goto errout;
2564
4e3fd7a0 2565 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2566 }
2567
2568 if (tb[RTA_IIF])
2569 iif = nla_get_u32(tb[RTA_IIF]);
2570
2571 if (tb[RTA_OIF])
72331bc0 2572 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2573
2574 if (iif) {
2575 struct net_device *dev;
72331bc0
SL
2576 int flags = 0;
2577
5578689a 2578 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2579 if (!dev) {
2580 err = -ENODEV;
ab364a6f 2581 goto errout;
1da177e4 2582 }
72331bc0
SL
2583
2584 fl6.flowi6_iif = iif;
2585
2586 if (!ipv6_addr_any(&fl6.saddr))
2587 flags |= RT6_LOOKUP_F_HAS_SADDR;
2588
2589 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2590 flags);
2591 } else {
2592 fl6.flowi6_oif = oif;
2593
2594 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2595 }
2596
ab364a6f 2597 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2598 if (!skb) {
2173bff5 2599 dst_release(&rt->dst);
ab364a6f
TG
2600 err = -ENOBUFS;
2601 goto errout;
2602 }
1da177e4 2603
ab364a6f
TG
2604 /* Reserve room for dummy headers, this skb can pass
2605 through good chunk of routing engine.
2606 */
459a98ed 2607 skb_reset_mac_header(skb);
ab364a6f 2608 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2609
d8d1f30b 2610 skb_dst_set(skb, &rt->dst);
1da177e4 2611
4c9483b2 2612 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 2613 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 2614 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2615 if (err < 0) {
ab364a6f
TG
2616 kfree_skb(skb);
2617 goto errout;
1da177e4
LT
2618 }
2619
15e47304 2620 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 2621errout:
1da177e4 2622 return err;
1da177e4
LT
2623}
2624
86872cb5 2625void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2626{
2627 struct sk_buff *skb;
5578689a 2628 struct net *net = info->nl_net;
528c4ceb
DL
2629 u32 seq;
2630 int err;
2631
2632 err = -ENOBUFS;
38308473 2633 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2634
339bf98f 2635 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2636 if (!skb)
21713ebc
TG
2637 goto errout;
2638
191cd582 2639 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
15e47304 2640 event, info->portid, seq, 0, 0, 0);
26932566
PM
2641 if (err < 0) {
2642 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2643 WARN_ON(err == -EMSGSIZE);
2644 kfree_skb(skb);
2645 goto errout;
2646 }
15e47304 2647 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
2648 info->nlh, gfp_any());
2649 return;
21713ebc
TG
2650errout:
2651 if (err < 0)
5578689a 2652 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2653}
2654
8ed67789
DL
2655static int ip6_route_dev_notify(struct notifier_block *this,
2656 unsigned long event, void *data)
2657{
2658 struct net_device *dev = (struct net_device *)data;
c346dca1 2659 struct net *net = dev_net(dev);
8ed67789
DL
2660
2661 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2662 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2663 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2664#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2665 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2666 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2667 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2668 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2669#endif
2670 }
2671
2672 return NOTIFY_OK;
2673}
2674
1da177e4
LT
2675/*
2676 * /proc
2677 */
2678
2679#ifdef CONFIG_PROC_FS
2680
1da177e4
LT
2681struct rt6_proc_arg
2682{
2683 char *buffer;
2684 int offset;
2685 int length;
2686 int skip;
2687 int len;
2688};
2689
2690static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2691{
33120b30 2692 struct seq_file *m = p_arg;
69cce1d1 2693 struct neighbour *n;
1da177e4 2694
4b7a4274 2695 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2696
2697#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2698 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2699#else
33120b30 2700 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2701#endif
97cac082 2702 n = rt->n;
69cce1d1
DM
2703 if (n) {
2704 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2705 } else {
33120b30 2706 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2707 }
33120b30 2708 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2709 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2710 rt->dst.__use, rt->rt6i_flags,
d1918542 2711 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2712 return 0;
2713}
2714
33120b30 2715static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2716{
f3db4851 2717 struct net *net = (struct net *)m->private;
32b293a5 2718 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2719 return 0;
2720}
1da177e4 2721
33120b30
AD
2722static int ipv6_route_open(struct inode *inode, struct file *file)
2723{
de05c557 2724 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2725}
2726
33120b30
AD
2727static const struct file_operations ipv6_route_proc_fops = {
2728 .owner = THIS_MODULE,
2729 .open = ipv6_route_open,
2730 .read = seq_read,
2731 .llseek = seq_lseek,
b6fcbdb4 2732 .release = single_release_net,
33120b30
AD
2733};
2734
1da177e4
LT
2735static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2736{
69ddb805 2737 struct net *net = (struct net *)seq->private;
1da177e4 2738 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2739 net->ipv6.rt6_stats->fib_nodes,
2740 net->ipv6.rt6_stats->fib_route_nodes,
2741 net->ipv6.rt6_stats->fib_rt_alloc,
2742 net->ipv6.rt6_stats->fib_rt_entries,
2743 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2744 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2745 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2746
2747 return 0;
2748}
2749
2750static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2751{
de05c557 2752 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2753}
2754
9a32144e 2755static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2756 .owner = THIS_MODULE,
2757 .open = rt6_stats_seq_open,
2758 .read = seq_read,
2759 .llseek = seq_lseek,
b6fcbdb4 2760 .release = single_release_net,
1da177e4
LT
2761};
2762#endif /* CONFIG_PROC_FS */
2763
2764#ifdef CONFIG_SYSCTL
2765
1da177e4 2766static
8d65af78 2767int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2768 void __user *buffer, size_t *lenp, loff_t *ppos)
2769{
c486da34
LAG
2770 struct net *net;
2771 int delay;
2772 if (!write)
1da177e4 2773 return -EINVAL;
c486da34
LAG
2774
2775 net = (struct net *)ctl->extra1;
2776 delay = net->ipv6.sysctl.flush_delay;
2777 proc_dointvec(ctl, write, buffer, lenp, ppos);
2778 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2779 return 0;
1da177e4
LT
2780}
2781
760f2d01 2782ctl_table ipv6_route_table_template[] = {
1ab1457c 2783 {
1da177e4 2784 .procname = "flush",
4990509f 2785 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2786 .maxlen = sizeof(int),
89c8b3a1 2787 .mode = 0200,
6d9f239a 2788 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2789 },
2790 {
1da177e4 2791 .procname = "gc_thresh",
9a7ec3a9 2792 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2793 .maxlen = sizeof(int),
2794 .mode = 0644,
6d9f239a 2795 .proc_handler = proc_dointvec,
1da177e4
LT
2796 },
2797 {
1da177e4 2798 .procname = "max_size",
4990509f 2799 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2800 .maxlen = sizeof(int),
2801 .mode = 0644,
6d9f239a 2802 .proc_handler = proc_dointvec,
1da177e4
LT
2803 },
2804 {
1da177e4 2805 .procname = "gc_min_interval",
4990509f 2806 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2807 .maxlen = sizeof(int),
2808 .mode = 0644,
6d9f239a 2809 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2810 },
2811 {
1da177e4 2812 .procname = "gc_timeout",
4990509f 2813 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2814 .maxlen = sizeof(int),
2815 .mode = 0644,
6d9f239a 2816 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2817 },
2818 {
1da177e4 2819 .procname = "gc_interval",
4990509f 2820 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2821 .maxlen = sizeof(int),
2822 .mode = 0644,
6d9f239a 2823 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2824 },
2825 {
1da177e4 2826 .procname = "gc_elasticity",
4990509f 2827 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2828 .maxlen = sizeof(int),
2829 .mode = 0644,
f3d3f616 2830 .proc_handler = proc_dointvec,
1da177e4
LT
2831 },
2832 {
1da177e4 2833 .procname = "mtu_expires",
4990509f 2834 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2835 .maxlen = sizeof(int),
2836 .mode = 0644,
6d9f239a 2837 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2838 },
2839 {
1da177e4 2840 .procname = "min_adv_mss",
4990509f 2841 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2842 .maxlen = sizeof(int),
2843 .mode = 0644,
f3d3f616 2844 .proc_handler = proc_dointvec,
1da177e4
LT
2845 },
2846 {
1da177e4 2847 .procname = "gc_min_interval_ms",
4990509f 2848 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2849 .maxlen = sizeof(int),
2850 .mode = 0644,
6d9f239a 2851 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2852 },
f8572d8f 2853 { }
1da177e4
LT
2854};
2855
2c8c1e72 2856struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2857{
2858 struct ctl_table *table;
2859
2860 table = kmemdup(ipv6_route_table_template,
2861 sizeof(ipv6_route_table_template),
2862 GFP_KERNEL);
5ee09105
YH
2863
2864 if (table) {
2865 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2866 table[0].extra1 = net;
86393e52 2867 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2868 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2869 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2870 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2871 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2872 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2873 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2874 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2875 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2876 }
2877
760f2d01
DL
2878 return table;
2879}
1da177e4
LT
2880#endif
2881
2c8c1e72 2882static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2883{
633d424b 2884 int ret = -ENOMEM;
8ed67789 2885
86393e52
AD
2886 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2887 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2888
fc66f95c
ED
2889 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2890 goto out_ip6_dst_ops;
2891
8ed67789
DL
2892 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2893 sizeof(*net->ipv6.ip6_null_entry),
2894 GFP_KERNEL);
2895 if (!net->ipv6.ip6_null_entry)
fc66f95c 2896 goto out_ip6_dst_entries;
d8d1f30b 2897 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2898 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2899 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2900 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2901 ip6_template_metrics, true);
8ed67789
DL
2902
2903#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2904 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2905 sizeof(*net->ipv6.ip6_prohibit_entry),
2906 GFP_KERNEL);
68fffc67
PZ
2907 if (!net->ipv6.ip6_prohibit_entry)
2908 goto out_ip6_null_entry;
d8d1f30b 2909 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2910 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2911 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2912 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2913 ip6_template_metrics, true);
8ed67789
DL
2914
2915 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2916 sizeof(*net->ipv6.ip6_blk_hole_entry),
2917 GFP_KERNEL);
68fffc67
PZ
2918 if (!net->ipv6.ip6_blk_hole_entry)
2919 goto out_ip6_prohibit_entry;
d8d1f30b 2920 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2921 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2922 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2923 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2924 ip6_template_metrics, true);
8ed67789
DL
2925#endif
2926
b339a47c
PZ
2927 net->ipv6.sysctl.flush_delay = 0;
2928 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2929 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2930 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2931 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2932 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2933 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2934 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2935
6891a346
BT
2936 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2937
8ed67789
DL
2938 ret = 0;
2939out:
2940 return ret;
f2fc6a54 2941
68fffc67
PZ
2942#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2943out_ip6_prohibit_entry:
2944 kfree(net->ipv6.ip6_prohibit_entry);
2945out_ip6_null_entry:
2946 kfree(net->ipv6.ip6_null_entry);
2947#endif
fc66f95c
ED
2948out_ip6_dst_entries:
2949 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2950out_ip6_dst_ops:
f2fc6a54 2951 goto out;
cdb18761
DL
2952}
2953
2c8c1e72 2954static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 2955{
8ed67789
DL
2956 kfree(net->ipv6.ip6_null_entry);
2957#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2958 kfree(net->ipv6.ip6_prohibit_entry);
2959 kfree(net->ipv6.ip6_blk_hole_entry);
2960#endif
41bb78b4 2961 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2962}
2963
d189634e
TG
2964static int __net_init ip6_route_net_init_late(struct net *net)
2965{
2966#ifdef CONFIG_PROC_FS
2967 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2968 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2969#endif
2970 return 0;
2971}
2972
2973static void __net_exit ip6_route_net_exit_late(struct net *net)
2974{
2975#ifdef CONFIG_PROC_FS
2976 proc_net_remove(net, "ipv6_route");
2977 proc_net_remove(net, "rt6_stats");
2978#endif
2979}
2980
cdb18761
DL
2981static struct pernet_operations ip6_route_net_ops = {
2982 .init = ip6_route_net_init,
2983 .exit = ip6_route_net_exit,
2984};
2985
c3426b47
DM
2986static int __net_init ipv6_inetpeer_init(struct net *net)
2987{
2988 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2989
2990 if (!bp)
2991 return -ENOMEM;
2992 inet_peer_base_init(bp);
2993 net->ipv6.peers = bp;
2994 return 0;
2995}
2996
2997static void __net_exit ipv6_inetpeer_exit(struct net *net)
2998{
2999 struct inet_peer_base *bp = net->ipv6.peers;
3000
3001 net->ipv6.peers = NULL;
56a6b248 3002 inetpeer_invalidate_tree(bp);
c3426b47
DM
3003 kfree(bp);
3004}
3005
2b823f72 3006static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3007 .init = ipv6_inetpeer_init,
3008 .exit = ipv6_inetpeer_exit,
3009};
3010
d189634e
TG
3011static struct pernet_operations ip6_route_net_late_ops = {
3012 .init = ip6_route_net_init_late,
3013 .exit = ip6_route_net_exit_late,
3014};
3015
8ed67789
DL
3016static struct notifier_block ip6_route_dev_notifier = {
3017 .notifier_call = ip6_route_dev_notify,
3018 .priority = 0,
3019};
3020
433d49c3 3021int __init ip6_route_init(void)
1da177e4 3022{
433d49c3
DL
3023 int ret;
3024
9a7ec3a9
DL
3025 ret = -ENOMEM;
3026 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3027 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3028 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3029 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3030 goto out;
14e50e57 3031
fc66f95c 3032 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3033 if (ret)
bdb3289f 3034 goto out_kmem_cache;
bdb3289f 3035
c3426b47
DM
3036 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3037 if (ret)
e8803b6c 3038 goto out_dst_entries;
2a0c451a 3039
7e52b33b
DM
3040 ret = register_pernet_subsys(&ip6_route_net_ops);
3041 if (ret)
3042 goto out_register_inetpeer;
c3426b47 3043
5dc121e9
AE
3044 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3045
8ed67789
DL
3046 /* Registering of the loopback is done before this portion of code,
3047 * the loopback reference in rt6_info will not be taken, do it
3048 * manually for init_net */
d8d1f30b 3049 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3050 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3051 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3052 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3053 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3054 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3055 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3056 #endif
e8803b6c 3057 ret = fib6_init();
433d49c3 3058 if (ret)
8ed67789 3059 goto out_register_subsys;
433d49c3 3060
433d49c3
DL
3061 ret = xfrm6_init();
3062 if (ret)
e8803b6c 3063 goto out_fib6_init;
c35b7e72 3064
433d49c3
DL
3065 ret = fib6_rules_init();
3066 if (ret)
3067 goto xfrm6_init;
7e5449c2 3068
d189634e
TG
3069 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3070 if (ret)
3071 goto fib6_rules_init;
3072
433d49c3 3073 ret = -ENOBUFS;
c7ac8679
GR
3074 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3075 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3076 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3077 goto out_register_late_subsys;
c127ea2c 3078
8ed67789 3079 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3080 if (ret)
d189634e 3081 goto out_register_late_subsys;
8ed67789 3082
433d49c3
DL
3083out:
3084 return ret;
3085
d189634e
TG
3086out_register_late_subsys:
3087 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3088fib6_rules_init:
433d49c3
DL
3089 fib6_rules_cleanup();
3090xfrm6_init:
433d49c3 3091 xfrm6_fini();
2a0c451a
TG
3092out_fib6_init:
3093 fib6_gc_cleanup();
8ed67789
DL
3094out_register_subsys:
3095 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3096out_register_inetpeer:
3097 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3098out_dst_entries:
3099 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3100out_kmem_cache:
f2fc6a54 3101 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3102 goto out;
1da177e4
LT
3103}
3104
3105void ip6_route_cleanup(void)
3106{
8ed67789 3107 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3108 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3109 fib6_rules_cleanup();
1da177e4 3110 xfrm6_fini();
1da177e4 3111 fib6_gc_cleanup();
c3426b47 3112 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3113 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3114 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3115 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3116}