]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - net/ipv6/route.c
Merge branch 'for-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetoot...
[thirdparty/kernel/stable.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
b811580d 66#include <trace/events/fib6.h>
1da177e4 67
7c0f6ba6 68#include <linux/uaccess.h>
1da177e4
LT
69
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
afc154e9 74enum rt6_nud_state {
7e980569
JB
75 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
78 RT6_NUD_SUCCEED = 1
79};
80
83a09abd 81static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 82static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 83static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 84static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
85static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86static void ip6_dst_destroy(struct dst_entry *);
87static void ip6_dst_ifdown(struct dst_entry *,
88 struct net_device *dev, int how);
569d3645 89static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
90
91static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 92static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 93static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 94static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 95static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
96static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
97 struct sk_buff *skb, u32 mtu);
98static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
99 struct sk_buff *skb);
4b32b5ad 100static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 101static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
16a16cd3
DA
102static size_t rt6_nlmsg_size(struct rt6_info *rt);
103static int rt6_fill_node(struct net *net,
104 struct sk_buff *skb, struct rt6_info *rt,
105 struct in6_addr *dst, struct in6_addr *src,
106 int iif, int type, u32 portid, u32 seq,
107 unsigned int flags);
35732d01
WW
108static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
109 struct in6_addr *daddr,
110 struct in6_addr *saddr);
1da177e4 111
70ceb4f5 112#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 113static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 114 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
115 const struct in6_addr *gwaddr,
116 struct net_device *dev,
95c96174 117 unsigned int pref);
efa2cea0 118static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 119 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
120 const struct in6_addr *gwaddr,
121 struct net_device *dev);
70ceb4f5
YH
122#endif
123
8d0b94af
MKL
124struct uncached_list {
125 spinlock_t lock;
126 struct list_head head;
127};
128
129static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
130
131static void rt6_uncached_list_add(struct rt6_info *rt)
132{
133 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
134
8d0b94af
MKL
135 rt->rt6i_uncached_list = ul;
136
137 spin_lock_bh(&ul->lock);
138 list_add_tail(&rt->rt6i_uncached, &ul->head);
139 spin_unlock_bh(&ul->lock);
140}
141
142static void rt6_uncached_list_del(struct rt6_info *rt)
143{
144 if (!list_empty(&rt->rt6i_uncached)) {
145 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 146 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
147
148 spin_lock_bh(&ul->lock);
149 list_del(&rt->rt6i_uncached);
81eb8447 150 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
151 spin_unlock_bh(&ul->lock);
152 }
153}
154
155static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
156{
157 struct net_device *loopback_dev = net->loopback_dev;
158 int cpu;
159
e332bc67
EB
160 if (dev == loopback_dev)
161 return;
162
8d0b94af
MKL
163 for_each_possible_cpu(cpu) {
164 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
165 struct rt6_info *rt;
166
167 spin_lock_bh(&ul->lock);
168 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
169 struct inet6_dev *rt_idev = rt->rt6i_idev;
170 struct net_device *rt_dev = rt->dst.dev;
171
e332bc67 172 if (rt_idev->dev == dev) {
8d0b94af
MKL
173 rt->rt6i_idev = in6_dev_get(loopback_dev);
174 in6_dev_put(rt_idev);
175 }
176
e332bc67 177 if (rt_dev == dev) {
8d0b94af
MKL
178 rt->dst.dev = loopback_dev;
179 dev_hold(rt->dst.dev);
180 dev_put(rt_dev);
181 }
182 }
183 spin_unlock_bh(&ul->lock);
184 }
185}
186
d52d3997
MKL
187static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
188{
189 return dst_metrics_write_ptr(rt->dst.from);
190}
191
06582540
DM
192static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
193{
4b32b5ad 194 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 195
d52d3997
MKL
196 if (rt->rt6i_flags & RTF_PCPU)
197 return rt6_pcpu_cow_metrics(rt);
198 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
199 return NULL;
200 else
3b471175 201 return dst_cow_metrics_generic(dst, old);
06582540
DM
202}
203
f894cbf8
DM
204static inline const void *choose_neigh_daddr(struct rt6_info *rt,
205 struct sk_buff *skb,
206 const void *daddr)
39232973
DM
207{
208 struct in6_addr *p = &rt->rt6i_gateway;
209
a7563f34 210 if (!ipv6_addr_any(p))
39232973 211 return (const void *) p;
f894cbf8
DM
212 else if (skb)
213 return &ipv6_hdr(skb)->daddr;
39232973
DM
214 return daddr;
215}
216
f894cbf8
DM
217static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
218 struct sk_buff *skb,
219 const void *daddr)
d3aaeb38 220{
39232973
DM
221 struct rt6_info *rt = (struct rt6_info *) dst;
222 struct neighbour *n;
223
f894cbf8 224 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 225 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
226 if (n)
227 return n;
228 return neigh_create(&nd_tbl, daddr, dst->dev);
229}
230
63fca65d
JA
231static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
232{
233 struct net_device *dev = dst->dev;
234 struct rt6_info *rt = (struct rt6_info *)dst;
235
236 daddr = choose_neigh_daddr(rt, NULL, daddr);
237 if (!daddr)
238 return;
239 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
240 return;
241 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
242 return;
243 __ipv6_confirm_neigh(dev, daddr);
244}
245
9a7ec3a9 246static struct dst_ops ip6_dst_ops_template = {
1da177e4 247 .family = AF_INET6,
1da177e4
LT
248 .gc = ip6_dst_gc,
249 .gc_thresh = 1024,
250 .check = ip6_dst_check,
0dbaee3b 251 .default_advmss = ip6_default_advmss,
ebb762f2 252 .mtu = ip6_mtu,
06582540 253 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
254 .destroy = ip6_dst_destroy,
255 .ifdown = ip6_dst_ifdown,
256 .negative_advice = ip6_negative_advice,
257 .link_failure = ip6_link_failure,
258 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 259 .redirect = rt6_do_redirect,
9f8955cc 260 .local_out = __ip6_local_out,
d3aaeb38 261 .neigh_lookup = ip6_neigh_lookup,
63fca65d 262 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
263};
264
ebb762f2 265static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 266{
618f9bc7
SK
267 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
268
269 return mtu ? : dst->dev->mtu;
ec831ea7
RD
270}
271
6700c270
DM
272static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
273 struct sk_buff *skb, u32 mtu)
14e50e57
DM
274{
275}
276
6700c270
DM
277static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
278 struct sk_buff *skb)
b587ee3b
DM
279{
280}
281
14e50e57
DM
282static struct dst_ops ip6_dst_blackhole_ops = {
283 .family = AF_INET6,
14e50e57
DM
284 .destroy = ip6_dst_destroy,
285 .check = ip6_dst_check,
ebb762f2 286 .mtu = ip6_blackhole_mtu,
214f45c9 287 .default_advmss = ip6_default_advmss,
14e50e57 288 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 289 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 290 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 291 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
292};
293
62fa8a84 294static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 295 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
296};
297
fb0af4c7 298static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
299 .dst = {
300 .__refcnt = ATOMIC_INIT(1),
301 .__use = 1,
2c20cbd7 302 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 303 .error = -ENETUNREACH,
d8d1f30b
CG
304 .input = ip6_pkt_discard,
305 .output = ip6_pkt_discard_out,
1da177e4
LT
306 },
307 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 308 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
309 .rt6i_metric = ~(u32) 0,
310 .rt6i_ref = ATOMIC_INIT(1),
311};
312
101367c2
TG
313#ifdef CONFIG_IPV6_MULTIPLE_TABLES
314
fb0af4c7 315static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
316 .dst = {
317 .__refcnt = ATOMIC_INIT(1),
318 .__use = 1,
2c20cbd7 319 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 320 .error = -EACCES,
d8d1f30b
CG
321 .input = ip6_pkt_prohibit,
322 .output = ip6_pkt_prohibit_out,
101367c2
TG
323 },
324 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 325 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
326 .rt6i_metric = ~(u32) 0,
327 .rt6i_ref = ATOMIC_INIT(1),
328};
329
fb0af4c7 330static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
331 .dst = {
332 .__refcnt = ATOMIC_INIT(1),
333 .__use = 1,
2c20cbd7 334 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 335 .error = -EINVAL,
d8d1f30b 336 .input = dst_discard,
ede2059d 337 .output = dst_discard_out,
101367c2
TG
338 },
339 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 340 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
341 .rt6i_metric = ~(u32) 0,
342 .rt6i_ref = ATOMIC_INIT(1),
343};
344
345#endif
346
ebfa45f0
MKL
347static void rt6_info_init(struct rt6_info *rt)
348{
349 struct dst_entry *dst = &rt->dst;
350
351 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
352 INIT_LIST_HEAD(&rt->rt6i_siblings);
353 INIT_LIST_HEAD(&rt->rt6i_uncached);
354}
355
1da177e4 356/* allocate dst with ip6_dst_ops */
d52d3997
MKL
357static struct rt6_info *__ip6_dst_alloc(struct net *net,
358 struct net_device *dev,
ad706862 359 int flags)
1da177e4 360{
97bab73f 361 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 362 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 363
81eb8447 364 if (rt) {
ebfa45f0 365 rt6_info_init(rt);
81eb8447
WW
366 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
367 }
8104891b 368
cf911662 369 return rt;
1da177e4
LT
370}
371
9ab179d8
DA
372struct rt6_info *ip6_dst_alloc(struct net *net,
373 struct net_device *dev,
374 int flags)
d52d3997 375{
ad706862 376 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
377
378 if (rt) {
379 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
bfd8e5a4 380 if (!rt->rt6i_pcpu) {
587fea74 381 dst_release_immediate(&rt->dst);
d52d3997
MKL
382 return NULL;
383 }
384 }
385
386 return rt;
387}
9ab179d8 388EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 389
1da177e4
LT
390static void ip6_dst_destroy(struct dst_entry *dst)
391{
392 struct rt6_info *rt = (struct rt6_info *)dst;
35732d01 393 struct rt6_exception_bucket *bucket;
ecd98837 394 struct dst_entry *from = dst->from;
8d0b94af 395 struct inet6_dev *idev;
1da177e4 396
4b32b5ad 397 dst_destroy_metrics_generic(dst);
87775312 398 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
399 rt6_uncached_list_del(rt);
400
401 idev = rt->rt6i_idev;
38308473 402 if (idev) {
1da177e4
LT
403 rt->rt6i_idev = NULL;
404 in6_dev_put(idev);
1ab1457c 405 }
35732d01
WW
406 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
407 if (bucket) {
408 rt->rt6i_exception_bucket = NULL;
409 kfree(bucket);
410 }
1716a961 411
ecd98837
YH
412 dst->from = NULL;
413 dst_release(from);
b3419363
DM
414}
415
1da177e4
LT
416static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
417 int how)
418{
419 struct rt6_info *rt = (struct rt6_info *)dst;
420 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 421 struct net_device *loopback_dev =
c346dca1 422 dev_net(dev)->loopback_dev;
1da177e4 423
e5645f51
WW
424 if (idev && idev->dev != loopback_dev) {
425 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
426 if (loopback_idev) {
427 rt->rt6i_idev = loopback_idev;
428 in6_dev_put(idev);
97cac082 429 }
1da177e4
LT
430 }
431}
432
5973fb1e
MKL
433static bool __rt6_check_expired(const struct rt6_info *rt)
434{
435 if (rt->rt6i_flags & RTF_EXPIRES)
436 return time_after(jiffies, rt->dst.expires);
437 else
438 return false;
439}
440
a50feda5 441static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 442{
1716a961
G
443 if (rt->rt6i_flags & RTF_EXPIRES) {
444 if (time_after(jiffies, rt->dst.expires))
a50feda5 445 return true;
1716a961 446 } else if (rt->dst.from) {
1e2ea8ad
XL
447 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
448 rt6_check_expired((struct rt6_info *)rt->dst.from);
1716a961 449 }
a50feda5 450 return false;
1da177e4
LT
451}
452
51ebd318 453static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
454 struct flowi6 *fl6, int oif,
455 int strict)
51ebd318
ND
456{
457 struct rt6_info *sibling, *next_sibling;
458 int route_choosen;
459
b673d6cc
JS
460 /* We might have already computed the hash for ICMPv6 errors. In such
461 * case it will always be non-zero. Otherwise now is the time to do it.
462 */
463 if (!fl6->mp_hash)
464 fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
465
466 route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
51ebd318
ND
467 /* Don't change the route, if route_choosen == 0
468 * (siblings does not include ourself)
469 */
470 if (route_choosen)
471 list_for_each_entry_safe(sibling, next_sibling,
472 &match->rt6i_siblings, rt6i_siblings) {
473 route_choosen--;
474 if (route_choosen == 0) {
52bd4c0c
ND
475 if (rt6_score_route(sibling, oif, strict) < 0)
476 break;
51ebd318
ND
477 match = sibling;
478 break;
479 }
480 }
481 return match;
482}
483
1da177e4 484/*
66f5d6ce 485 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
486 */
487
8ed67789
DL
488static inline struct rt6_info *rt6_device_match(struct net *net,
489 struct rt6_info *rt,
b71d1d42 490 const struct in6_addr *saddr,
1da177e4 491 int oif,
d420895e 492 int flags)
1da177e4
LT
493{
494 struct rt6_info *local = NULL;
495 struct rt6_info *sprt;
496
dd3abc4e
YH
497 if (!oif && ipv6_addr_any(saddr))
498 goto out;
499
66f5d6ce 500 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->dst.rt6_next)) {
d1918542 501 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
502
503 if (oif) {
1da177e4
LT
504 if (dev->ifindex == oif)
505 return sprt;
506 if (dev->flags & IFF_LOOPBACK) {
38308473 507 if (!sprt->rt6i_idev ||
1da177e4 508 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 509 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 510 continue;
17fb0b2b
DA
511 if (local &&
512 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
513 continue;
514 }
515 local = sprt;
516 }
dd3abc4e
YH
517 } else {
518 if (ipv6_chk_addr(net, saddr, dev,
519 flags & RT6_LOOKUP_F_IFACE))
520 return sprt;
1da177e4 521 }
dd3abc4e 522 }
1da177e4 523
dd3abc4e 524 if (oif) {
1da177e4
LT
525 if (local)
526 return local;
527
d420895e 528 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 529 return net->ipv6.ip6_null_entry;
1da177e4 530 }
dd3abc4e 531out:
1da177e4
LT
532 return rt;
533}
534
27097255 535#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
536struct __rt6_probe_work {
537 struct work_struct work;
538 struct in6_addr target;
539 struct net_device *dev;
540};
541
542static void rt6_probe_deferred(struct work_struct *w)
543{
544 struct in6_addr mcaddr;
545 struct __rt6_probe_work *work =
546 container_of(w, struct __rt6_probe_work, work);
547
548 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 549 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 550 dev_put(work->dev);
662f5533 551 kfree(work);
c2f17e82
HFS
552}
553
27097255
YH
554static void rt6_probe(struct rt6_info *rt)
555{
990edb42 556 struct __rt6_probe_work *work;
f2c31e32 557 struct neighbour *neigh;
27097255
YH
558 /*
559 * Okay, this does not seem to be appropriate
560 * for now, however, we need to check if it
561 * is really so; aka Router Reachability Probing.
562 *
563 * Router Reachability Probe MUST be rate-limited
564 * to no more than one per minute.
565 */
2152caea 566 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 567 return;
2152caea
YH
568 rcu_read_lock_bh();
569 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
570 if (neigh) {
8d6c31bf
MKL
571 if (neigh->nud_state & NUD_VALID)
572 goto out;
573
990edb42 574 work = NULL;
2152caea 575 write_lock(&neigh->lock);
990edb42
MKL
576 if (!(neigh->nud_state & NUD_VALID) &&
577 time_after(jiffies,
578 neigh->updated +
579 rt->rt6i_idev->cnf.rtr_probe_interval)) {
580 work = kmalloc(sizeof(*work), GFP_ATOMIC);
581 if (work)
582 __neigh_set_probe_once(neigh);
c2f17e82 583 }
2152caea 584 write_unlock(&neigh->lock);
990edb42
MKL
585 } else {
586 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 587 }
990edb42
MKL
588
589 if (work) {
590 INIT_WORK(&work->work, rt6_probe_deferred);
591 work->target = rt->rt6i_gateway;
592 dev_hold(rt->dst.dev);
593 work->dev = rt->dst.dev;
594 schedule_work(&work->work);
595 }
596
8d6c31bf 597out:
2152caea 598 rcu_read_unlock_bh();
27097255
YH
599}
600#else
601static inline void rt6_probe(struct rt6_info *rt)
602{
27097255
YH
603}
604#endif
605
1da177e4 606/*
554cfb7e 607 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 608 */
b6f99a21 609static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 610{
d1918542 611 struct net_device *dev = rt->dst.dev;
161980f4 612 if (!oif || dev->ifindex == oif)
554cfb7e 613 return 2;
161980f4
DM
614 if ((dev->flags & IFF_LOOPBACK) &&
615 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
616 return 1;
617 return 0;
554cfb7e 618}
1da177e4 619
afc154e9 620static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 621{
f2c31e32 622 struct neighbour *neigh;
afc154e9 623 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 624
4d0c5911
YH
625 if (rt->rt6i_flags & RTF_NONEXTHOP ||
626 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 627 return RT6_NUD_SUCCEED;
145a3621
YH
628
629 rcu_read_lock_bh();
630 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
631 if (neigh) {
632 read_lock(&neigh->lock);
554cfb7e 633 if (neigh->nud_state & NUD_VALID)
afc154e9 634 ret = RT6_NUD_SUCCEED;
398bcbeb 635#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 636 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 637 ret = RT6_NUD_SUCCEED;
7e980569
JB
638 else
639 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 640#endif
145a3621 641 read_unlock(&neigh->lock);
afc154e9
HFS
642 } else {
643 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 644 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 645 }
145a3621
YH
646 rcu_read_unlock_bh();
647
a5a81f0b 648 return ret;
1da177e4
LT
649}
650
554cfb7e
YH
651static int rt6_score_route(struct rt6_info *rt, int oif,
652 int strict)
1da177e4 653{
a5a81f0b 654 int m;
1ab1457c 655
4d0c5911 656 m = rt6_check_dev(rt, oif);
77d16f45 657 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 658 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
659#ifdef CONFIG_IPV6_ROUTER_PREF
660 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
661#endif
afc154e9
HFS
662 if (strict & RT6_LOOKUP_F_REACHABLE) {
663 int n = rt6_check_neigh(rt);
664 if (n < 0)
665 return n;
666 }
554cfb7e
YH
667 return m;
668}
669
f11e6659 670static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
671 int *mpri, struct rt6_info *match,
672 bool *do_rr)
554cfb7e 673{
f11e6659 674 int m;
afc154e9 675 bool match_do_rr = false;
35103d11
AG
676 struct inet6_dev *idev = rt->rt6i_idev;
677 struct net_device *dev = rt->dst.dev;
678
679 if (dev && !netif_carrier_ok(dev) &&
d5d32e4b
DA
680 idev->cnf.ignore_routes_with_linkdown &&
681 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 682 goto out;
f11e6659
DM
683
684 if (rt6_check_expired(rt))
685 goto out;
686
687 m = rt6_score_route(rt, oif, strict);
7e980569 688 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
689 match_do_rr = true;
690 m = 0; /* lowest valid score */
7e980569 691 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 692 goto out;
afc154e9
HFS
693 }
694
695 if (strict & RT6_LOOKUP_F_REACHABLE)
696 rt6_probe(rt);
f11e6659 697
7e980569 698 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 699 if (m > *mpri) {
afc154e9 700 *do_rr = match_do_rr;
f11e6659
DM
701 *mpri = m;
702 match = rt;
f11e6659 703 }
f11e6659
DM
704out:
705 return match;
706}
707
708static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
8d1040e8 709 struct rt6_info *leaf,
f11e6659 710 struct rt6_info *rr_head,
afc154e9
HFS
711 u32 metric, int oif, int strict,
712 bool *do_rr)
f11e6659 713{
9fbdcfaf 714 struct rt6_info *rt, *match, *cont;
554cfb7e 715 int mpri = -1;
1da177e4 716
f11e6659 717 match = NULL;
9fbdcfaf 718 cont = NULL;
66f5d6ce 719 for (rt = rr_head; rt; rt = rcu_dereference(rt->dst.rt6_next)) {
9fbdcfaf
SK
720 if (rt->rt6i_metric != metric) {
721 cont = rt;
722 break;
723 }
724
725 match = find_match(rt, oif, strict, &mpri, match, do_rr);
726 }
727
66f5d6ce
WW
728 for (rt = leaf; rt && rt != rr_head;
729 rt = rcu_dereference(rt->dst.rt6_next)) {
9fbdcfaf
SK
730 if (rt->rt6i_metric != metric) {
731 cont = rt;
732 break;
733 }
734
afc154e9 735 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
736 }
737
738 if (match || !cont)
739 return match;
740
66f5d6ce 741 for (rt = cont; rt; rt = rcu_dereference(rt->dst.rt6_next))
afc154e9 742 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 743
f11e6659
DM
744 return match;
745}
1da177e4 746
8d1040e8
WW
747static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
748 int oif, int strict)
f11e6659 749{
66f5d6ce 750 struct rt6_info *leaf = rcu_dereference(fn->leaf);
f11e6659 751 struct rt6_info *match, *rt0;
afc154e9 752 bool do_rr = false;
17ecf590 753 int key_plen;
1da177e4 754
8d1040e8
WW
755 if (!leaf)
756 return net->ipv6.ip6_null_entry;
757
66f5d6ce 758 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 759 if (!rt0)
66f5d6ce 760 rt0 = leaf;
1da177e4 761
17ecf590
WW
762 /* Double check to make sure fn is not an intermediate node
763 * and fn->leaf does not points to its child's leaf
764 * (This might happen if all routes under fn are deleted from
765 * the tree and fib6_repair_tree() is called on the node.)
766 */
767 key_plen = rt0->rt6i_dst.plen;
768#ifdef CONFIG_IPV6_SUBTREES
769 if (rt0->rt6i_src.plen)
770 key_plen = rt0->rt6i_src.plen;
771#endif
772 if (fn->fn_bit != key_plen)
773 return net->ipv6.ip6_null_entry;
774
8d1040e8 775 match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
afc154e9 776 &do_rr);
1da177e4 777
afc154e9 778 if (do_rr) {
66f5d6ce 779 struct rt6_info *next = rcu_dereference(rt0->dst.rt6_next);
f11e6659 780
554cfb7e 781 /* no entries matched; do round-robin */
f11e6659 782 if (!next || next->rt6i_metric != rt0->rt6i_metric)
8d1040e8 783 next = leaf;
f11e6659 784
66f5d6ce
WW
785 if (next != rt0) {
786 spin_lock_bh(&leaf->rt6i_table->tb6_lock);
787 /* make sure next is not being deleted from the tree */
788 if (next->rt6i_node)
789 rcu_assign_pointer(fn->rr_ptr, next);
790 spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
791 }
1da177e4 792 }
1da177e4 793
a02cec21 794 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
795}
796
8b9df265
MKL
797static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
798{
799 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
800}
801
70ceb4f5
YH
802#ifdef CONFIG_IPV6_ROUTE_INFO
803int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 804 const struct in6_addr *gwaddr)
70ceb4f5 805{
c346dca1 806 struct net *net = dev_net(dev);
70ceb4f5
YH
807 struct route_info *rinfo = (struct route_info *) opt;
808 struct in6_addr prefix_buf, *prefix;
809 unsigned int pref;
4bed72e4 810 unsigned long lifetime;
70ceb4f5
YH
811 struct rt6_info *rt;
812
813 if (len < sizeof(struct route_info)) {
814 return -EINVAL;
815 }
816
817 /* Sanity check for prefix_len and length */
818 if (rinfo->length > 3) {
819 return -EINVAL;
820 } else if (rinfo->prefix_len > 128) {
821 return -EINVAL;
822 } else if (rinfo->prefix_len > 64) {
823 if (rinfo->length < 2) {
824 return -EINVAL;
825 }
826 } else if (rinfo->prefix_len > 0) {
827 if (rinfo->length < 1) {
828 return -EINVAL;
829 }
830 }
831
832 pref = rinfo->route_pref;
833 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 834 return -EINVAL;
70ceb4f5 835
4bed72e4 836 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
837
838 if (rinfo->length == 3)
839 prefix = (struct in6_addr *)rinfo->prefix;
840 else {
841 /* this function is safe */
842 ipv6_addr_prefix(&prefix_buf,
843 (struct in6_addr *)rinfo->prefix,
844 rinfo->prefix_len);
845 prefix = &prefix_buf;
846 }
847
f104a567
DJ
848 if (rinfo->prefix_len == 0)
849 rt = rt6_get_dflt_router(gwaddr, dev);
850 else
851 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 852 gwaddr, dev);
70ceb4f5
YH
853
854 if (rt && !lifetime) {
e0a1ad73 855 ip6_del_rt(rt);
70ceb4f5
YH
856 rt = NULL;
857 }
858
859 if (!rt && lifetime)
830218c1
DA
860 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
861 dev, pref);
70ceb4f5
YH
862 else if (rt)
863 rt->rt6i_flags = RTF_ROUTEINFO |
864 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
865
866 if (rt) {
1716a961
G
867 if (!addrconf_finite_timeout(lifetime))
868 rt6_clean_expires(rt);
869 else
870 rt6_set_expires(rt, jiffies + HZ * lifetime);
871
94e187c0 872 ip6_rt_put(rt);
70ceb4f5
YH
873 }
874 return 0;
875}
876#endif
877
a3c00e46
MKL
878static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
879 struct in6_addr *saddr)
880{
66f5d6ce 881 struct fib6_node *pn, *sn;
a3c00e46
MKL
882 while (1) {
883 if (fn->fn_flags & RTN_TL_ROOT)
884 return NULL;
66f5d6ce
WW
885 pn = rcu_dereference(fn->parent);
886 sn = FIB6_SUBTREE(pn);
887 if (sn && sn != fn)
888 fn = fib6_lookup(sn, NULL, saddr);
a3c00e46
MKL
889 else
890 fn = pn;
891 if (fn->fn_flags & RTN_RTINFO)
892 return fn;
893 }
894}
c71099ac 895
d3843fe5
WW
896static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
897 bool null_fallback)
898{
899 struct rt6_info *rt = *prt;
900
901 if (dst_hold_safe(&rt->dst))
902 return true;
903 if (null_fallback) {
904 rt = net->ipv6.ip6_null_entry;
905 dst_hold(&rt->dst);
906 } else {
907 rt = NULL;
908 }
909 *prt = rt;
910 return false;
911}
912
8ed67789
DL
913static struct rt6_info *ip6_pol_route_lookup(struct net *net,
914 struct fib6_table *table,
4c9483b2 915 struct flowi6 *fl6, int flags)
1da177e4 916{
2b760fcf 917 struct rt6_info *rt, *rt_cache;
1da177e4 918 struct fib6_node *fn;
1da177e4 919
66f5d6ce 920 rcu_read_lock();
4c9483b2 921 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 922restart:
66f5d6ce
WW
923 rt = rcu_dereference(fn->leaf);
924 if (!rt) {
925 rt = net->ipv6.ip6_null_entry;
926 } else {
927 rt = rt6_device_match(net, rt, &fl6->saddr,
928 fl6->flowi6_oif, flags);
929 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
930 rt = rt6_multipath_select(rt, fl6,
931 fl6->flowi6_oif, flags);
932 }
a3c00e46
MKL
933 if (rt == net->ipv6.ip6_null_entry) {
934 fn = fib6_backtrack(fn, &fl6->saddr);
935 if (fn)
936 goto restart;
937 }
2b760fcf
WW
938 /* Search through exception table */
939 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
940 if (rt_cache)
941 rt = rt_cache;
942
d3843fe5
WW
943 if (ip6_hold_safe(net, &rt, true))
944 dst_use_noref(&rt->dst, jiffies);
945
66f5d6ce 946 rcu_read_unlock();
b811580d
DA
947
948 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
949
c71099ac
TG
950 return rt;
951
952}
953
67ba4152 954struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
955 int flags)
956{
957 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
958}
959EXPORT_SYMBOL_GPL(ip6_route_lookup);
960
9acd9f3a
YH
961struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
962 const struct in6_addr *saddr, int oif, int strict)
c71099ac 963{
4c9483b2
DM
964 struct flowi6 fl6 = {
965 .flowi6_oif = oif,
966 .daddr = *daddr,
c71099ac
TG
967 };
968 struct dst_entry *dst;
77d16f45 969 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 970
adaa70bb 971 if (saddr) {
4c9483b2 972 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
973 flags |= RT6_LOOKUP_F_HAS_SADDR;
974 }
975
4c9483b2 976 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
977 if (dst->error == 0)
978 return (struct rt6_info *) dst;
979
980 dst_release(dst);
981
1da177e4
LT
982 return NULL;
983}
7159039a
YH
984EXPORT_SYMBOL(rt6_lookup);
985
c71099ac 986/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
987 * It takes new route entry, the addition fails by any reason the
988 * route is released.
989 * Caller must hold dst before calling it.
1da177e4
LT
990 */
991
e5fd387a 992static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
333c4301
DA
993 struct mx6_config *mxc,
994 struct netlink_ext_ack *extack)
1da177e4
LT
995{
996 int err;
c71099ac 997 struct fib6_table *table;
1da177e4 998
c71099ac 999 table = rt->rt6i_table;
66f5d6ce 1000 spin_lock_bh(&table->tb6_lock);
333c4301 1001 err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
66f5d6ce 1002 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1003
1004 return err;
1005}
1006
40e22e8f
TG
1007int ip6_ins_rt(struct rt6_info *rt)
1008{
e715b6d3
FW
1009 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
1010 struct mx6_config mxc = { .mx = NULL, };
1011
1cfb71ee
WW
1012 /* Hold dst to account for the reference from the fib6 tree */
1013 dst_hold(&rt->dst);
333c4301 1014 return __ip6_ins_rt(rt, &info, &mxc, NULL);
40e22e8f
TG
1015}
1016
4832c30d
DA
1017/* called with rcu_lock held */
1018static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
1019{
1020 struct net_device *dev = rt->dst.dev;
1021
1022 if (rt->rt6i_flags & RTF_LOCAL) {
1023 /* for copies of local routes, dst->dev needs to be the
1024 * device if it is a master device, the master device if
1025 * device is enslaved, and the loopback as the default
1026 */
1027 if (netif_is_l3_slave(dev) &&
1028 !rt6_need_strict(&rt->rt6i_dst.addr))
1029 dev = l3mdev_master_dev_rcu(dev);
1030 else if (!netif_is_l3_master(dev))
1031 dev = dev_net(dev)->loopback_dev;
1032 /* last case is netif_is_l3_master(dev) is true in which
1033 * case we want dev returned to be dev
1034 */
1035 }
1036
1037 return dev;
1038}
1039
8b9df265
MKL
1040static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
1041 const struct in6_addr *daddr,
1042 const struct in6_addr *saddr)
1da177e4 1043{
4832c30d 1044 struct net_device *dev;
1da177e4
LT
1045 struct rt6_info *rt;
1046
1047 /*
1048 * Clone the route.
1049 */
1050
d52d3997 1051 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 1052 ort = (struct rt6_info *)ort->dst.from;
1da177e4 1053
4832c30d
DA
1054 rcu_read_lock();
1055 dev = ip6_rt_get_dev_rcu(ort);
1056 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1057 rcu_read_unlock();
83a09abd
MKL
1058 if (!rt)
1059 return NULL;
1060
1061 ip6_rt_copy_init(rt, ort);
1062 rt->rt6i_flags |= RTF_CACHE;
1063 rt->rt6i_metric = 0;
1064 rt->dst.flags |= DST_HOST;
1065 rt->rt6i_dst.addr = *daddr;
1066 rt->rt6i_dst.plen = 128;
1da177e4 1067
83a09abd
MKL
1068 if (!rt6_is_gw_or_nonexthop(ort)) {
1069 if (ort->rt6i_dst.plen != 128 &&
1070 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1071 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1072#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1073 if (rt->rt6i_src.plen && saddr) {
1074 rt->rt6i_src.addr = *saddr;
1075 rt->rt6i_src.plen = 128;
8b9df265 1076 }
83a09abd 1077#endif
95a9a5ba 1078 }
1da177e4 1079
95a9a5ba
YH
1080 return rt;
1081}
1da177e4 1082
d52d3997
MKL
1083static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1084{
4832c30d 1085 struct net_device *dev;
d52d3997
MKL
1086 struct rt6_info *pcpu_rt;
1087
4832c30d
DA
1088 rcu_read_lock();
1089 dev = ip6_rt_get_dev_rcu(rt);
1090 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1091 rcu_read_unlock();
d52d3997
MKL
1092 if (!pcpu_rt)
1093 return NULL;
1094 ip6_rt_copy_init(pcpu_rt, rt);
1095 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1096 pcpu_rt->rt6i_flags |= RTF_PCPU;
1097 return pcpu_rt;
1098}
1099
66f5d6ce 1100/* It should be called with rcu_read_lock() acquired */
d52d3997
MKL
1101static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1102{
a73e4195 1103 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1104
1105 p = this_cpu_ptr(rt->rt6i_pcpu);
1106 pcpu_rt = *p;
1107
d3843fe5 1108 if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
a73e4195 1109 rt6_dst_from_metrics_check(pcpu_rt);
d3843fe5 1110
a73e4195
MKL
1111 return pcpu_rt;
1112}
1113
1114static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1115{
1116 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1117
1118 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1119 if (!pcpu_rt) {
1120 struct net *net = dev_net(rt->dst.dev);
1121
9c7370a1
MKL
1122 dst_hold(&net->ipv6.ip6_null_entry->dst);
1123 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1124 }
1125
a94b9367
WW
1126 dst_hold(&pcpu_rt->dst);
1127 p = this_cpu_ptr(rt->rt6i_pcpu);
1128 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1129 BUG_ON(prev);
a94b9367 1130
d52d3997
MKL
1131 rt6_dst_from_metrics_check(pcpu_rt);
1132 return pcpu_rt;
1133}
1134
35732d01
WW
1135/* exception hash table implementation
1136 */
1137static DEFINE_SPINLOCK(rt6_exception_lock);
1138
1139/* Remove rt6_ex from hash table and free the memory
1140 * Caller must hold rt6_exception_lock
1141 */
1142static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1143 struct rt6_exception *rt6_ex)
1144{
b2427e67 1145 struct net *net;
81eb8447 1146
35732d01
WW
1147 if (!bucket || !rt6_ex)
1148 return;
b2427e67
CIK
1149
1150 net = dev_net(rt6_ex->rt6i->dst.dev);
35732d01
WW
1151 rt6_ex->rt6i->rt6i_node = NULL;
1152 hlist_del_rcu(&rt6_ex->hlist);
1153 rt6_release(rt6_ex->rt6i);
1154 kfree_rcu(rt6_ex, rcu);
1155 WARN_ON_ONCE(!bucket->depth);
1156 bucket->depth--;
81eb8447 1157 net->ipv6.rt6_stats->fib_rt_cache--;
35732d01
WW
1158}
1159
1160/* Remove oldest rt6_ex in bucket and free the memory
1161 * Caller must hold rt6_exception_lock
1162 */
1163static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1164{
1165 struct rt6_exception *rt6_ex, *oldest = NULL;
1166
1167 if (!bucket)
1168 return;
1169
1170 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1171 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1172 oldest = rt6_ex;
1173 }
1174 rt6_remove_exception(bucket, oldest);
1175}
1176
1177static u32 rt6_exception_hash(const struct in6_addr *dst,
1178 const struct in6_addr *src)
1179{
1180 static u32 seed __read_mostly;
1181 u32 val;
1182
1183 net_get_random_once(&seed, sizeof(seed));
1184 val = jhash(dst, sizeof(*dst), seed);
1185
1186#ifdef CONFIG_IPV6_SUBTREES
1187 if (src)
1188 val = jhash(src, sizeof(*src), val);
1189#endif
1190 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1191}
1192
1193/* Helper function to find the cached rt in the hash table
1194 * and update bucket pointer to point to the bucket for this
1195 * (daddr, saddr) pair
1196 * Caller must hold rt6_exception_lock
1197 */
1198static struct rt6_exception *
1199__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1200 const struct in6_addr *daddr,
1201 const struct in6_addr *saddr)
1202{
1203 struct rt6_exception *rt6_ex;
1204 u32 hval;
1205
1206 if (!(*bucket) || !daddr)
1207 return NULL;
1208
1209 hval = rt6_exception_hash(daddr, saddr);
1210 *bucket += hval;
1211
1212 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1213 struct rt6_info *rt6 = rt6_ex->rt6i;
1214 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1215
1216#ifdef CONFIG_IPV6_SUBTREES
1217 if (matched && saddr)
1218 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1219#endif
1220 if (matched)
1221 return rt6_ex;
1222 }
1223 return NULL;
1224}
1225
1226/* Helper function to find the cached rt in the hash table
1227 * and update bucket pointer to point to the bucket for this
1228 * (daddr, saddr) pair
1229 * Caller must hold rcu_read_lock()
1230 */
1231static struct rt6_exception *
1232__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1233 const struct in6_addr *daddr,
1234 const struct in6_addr *saddr)
1235{
1236 struct rt6_exception *rt6_ex;
1237 u32 hval;
1238
1239 WARN_ON_ONCE(!rcu_read_lock_held());
1240
1241 if (!(*bucket) || !daddr)
1242 return NULL;
1243
1244 hval = rt6_exception_hash(daddr, saddr);
1245 *bucket += hval;
1246
1247 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1248 struct rt6_info *rt6 = rt6_ex->rt6i;
1249 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1250
1251#ifdef CONFIG_IPV6_SUBTREES
1252 if (matched && saddr)
1253 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1254#endif
1255 if (matched)
1256 return rt6_ex;
1257 }
1258 return NULL;
1259}
1260
1261static int rt6_insert_exception(struct rt6_info *nrt,
1262 struct rt6_info *ort)
1263{
81eb8447 1264 struct net *net = dev_net(ort->dst.dev);
35732d01
WW
1265 struct rt6_exception_bucket *bucket;
1266 struct in6_addr *src_key = NULL;
1267 struct rt6_exception *rt6_ex;
1268 int err = 0;
1269
1270 /* ort can't be a cache or pcpu route */
1271 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
1272 ort = (struct rt6_info *)ort->dst.from;
1273 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1274
1275 spin_lock_bh(&rt6_exception_lock);
1276
1277 if (ort->exception_bucket_flushed) {
1278 err = -EINVAL;
1279 goto out;
1280 }
1281
1282 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1283 lockdep_is_held(&rt6_exception_lock));
1284 if (!bucket) {
1285 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1286 GFP_ATOMIC);
1287 if (!bucket) {
1288 err = -ENOMEM;
1289 goto out;
1290 }
1291 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1292 }
1293
1294#ifdef CONFIG_IPV6_SUBTREES
1295 /* rt6i_src.plen != 0 indicates ort is in subtree
1296 * and exception table is indexed by a hash of
1297 * both rt6i_dst and rt6i_src.
1298 * Otherwise, the exception table is indexed by
1299 * a hash of only rt6i_dst.
1300 */
1301 if (ort->rt6i_src.plen)
1302 src_key = &nrt->rt6i_src.addr;
1303#endif
60006a48
WW
1304
1305 /* Update rt6i_prefsrc as it could be changed
1306 * in rt6_remove_prefsrc()
1307 */
1308 nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
f5bbe7ee
WW
1309 /* rt6_mtu_change() might lower mtu on ort.
1310 * Only insert this exception route if its mtu
1311 * is less than ort's mtu value.
1312 */
1313 if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
1314 err = -EINVAL;
1315 goto out;
1316 }
60006a48 1317
35732d01
WW
1318 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1319 src_key);
1320 if (rt6_ex)
1321 rt6_remove_exception(bucket, rt6_ex);
1322
1323 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1324 if (!rt6_ex) {
1325 err = -ENOMEM;
1326 goto out;
1327 }
1328 rt6_ex->rt6i = nrt;
1329 rt6_ex->stamp = jiffies;
1330 atomic_inc(&nrt->rt6i_ref);
1331 nrt->rt6i_node = ort->rt6i_node;
1332 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1333 bucket->depth++;
81eb8447 1334 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1335
1336 if (bucket->depth > FIB6_MAX_DEPTH)
1337 rt6_exception_remove_oldest(bucket);
1338
1339out:
1340 spin_unlock_bh(&rt6_exception_lock);
1341
1342 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1343 if (!err) {
35732d01 1344 fib6_update_sernum(ort);
b886d5f2
PA
1345 fib6_force_start_gc(net);
1346 }
35732d01
WW
1347
1348 return err;
1349}
1350
1351void rt6_flush_exceptions(struct rt6_info *rt)
1352{
1353 struct rt6_exception_bucket *bucket;
1354 struct rt6_exception *rt6_ex;
1355 struct hlist_node *tmp;
1356 int i;
1357
1358 spin_lock_bh(&rt6_exception_lock);
1359 /* Prevent rt6_insert_exception() to recreate the bucket list */
1360 rt->exception_bucket_flushed = 1;
1361
1362 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1363 lockdep_is_held(&rt6_exception_lock));
1364 if (!bucket)
1365 goto out;
1366
1367 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1368 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1369 rt6_remove_exception(bucket, rt6_ex);
1370 WARN_ON_ONCE(bucket->depth);
1371 bucket++;
1372 }
1373
1374out:
1375 spin_unlock_bh(&rt6_exception_lock);
1376}
1377
1378/* Find cached rt in the hash table inside passed in rt
1379 * Caller has to hold rcu_read_lock()
1380 */
1381static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1382 struct in6_addr *daddr,
1383 struct in6_addr *saddr)
1384{
1385 struct rt6_exception_bucket *bucket;
1386 struct in6_addr *src_key = NULL;
1387 struct rt6_exception *rt6_ex;
1388 struct rt6_info *res = NULL;
1389
1390 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1391
1392#ifdef CONFIG_IPV6_SUBTREES
1393 /* rt6i_src.plen != 0 indicates rt is in subtree
1394 * and exception table is indexed by a hash of
1395 * both rt6i_dst and rt6i_src.
1396 * Otherwise, the exception table is indexed by
1397 * a hash of only rt6i_dst.
1398 */
1399 if (rt->rt6i_src.plen)
1400 src_key = saddr;
1401#endif
1402 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1403
1404 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1405 res = rt6_ex->rt6i;
1406
1407 return res;
1408}
1409
1410/* Remove the passed in cached rt from the hash table that contains it */
1411int rt6_remove_exception_rt(struct rt6_info *rt)
1412{
1413 struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1414 struct rt6_exception_bucket *bucket;
1415 struct in6_addr *src_key = NULL;
1416 struct rt6_exception *rt6_ex;
1417 int err;
1418
1419 if (!from ||
442d713b 1420 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1421 return -EINVAL;
1422
1423 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1424 return -ENOENT;
1425
1426 spin_lock_bh(&rt6_exception_lock);
1427 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1428 lockdep_is_held(&rt6_exception_lock));
1429#ifdef CONFIG_IPV6_SUBTREES
1430 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1431 * and exception table is indexed by a hash of
1432 * both rt6i_dst and rt6i_src.
1433 * Otherwise, the exception table is indexed by
1434 * a hash of only rt6i_dst.
1435 */
1436 if (from->rt6i_src.plen)
1437 src_key = &rt->rt6i_src.addr;
1438#endif
1439 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1440 &rt->rt6i_dst.addr,
1441 src_key);
1442 if (rt6_ex) {
1443 rt6_remove_exception(bucket, rt6_ex);
1444 err = 0;
1445 } else {
1446 err = -ENOENT;
1447 }
1448
1449 spin_unlock_bh(&rt6_exception_lock);
1450 return err;
1451}
1452
1453/* Find rt6_ex which contains the passed in rt cache and
1454 * refresh its stamp
1455 */
1456static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1457{
1458 struct rt6_info *from = (struct rt6_info *)rt->dst.from;
1459 struct rt6_exception_bucket *bucket;
1460 struct in6_addr *src_key = NULL;
1461 struct rt6_exception *rt6_ex;
1462
1463 if (!from ||
442d713b 1464 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1465 return;
1466
1467 rcu_read_lock();
1468 bucket = rcu_dereference(from->rt6i_exception_bucket);
1469
1470#ifdef CONFIG_IPV6_SUBTREES
1471 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1472 * and exception table is indexed by a hash of
1473 * both rt6i_dst and rt6i_src.
1474 * Otherwise, the exception table is indexed by
1475 * a hash of only rt6i_dst.
1476 */
1477 if (from->rt6i_src.plen)
1478 src_key = &rt->rt6i_src.addr;
1479#endif
1480 rt6_ex = __rt6_find_exception_rcu(&bucket,
1481 &rt->rt6i_dst.addr,
1482 src_key);
1483 if (rt6_ex)
1484 rt6_ex->stamp = jiffies;
1485
1486 rcu_read_unlock();
1487}
1488
60006a48
WW
1489static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1490{
1491 struct rt6_exception_bucket *bucket;
1492 struct rt6_exception *rt6_ex;
1493 int i;
1494
1495 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1496 lockdep_is_held(&rt6_exception_lock));
1497
1498 if (bucket) {
1499 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1500 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1501 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1502 }
1503 bucket++;
1504 }
1505 }
1506}
1507
f5bbe7ee
WW
1508static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
1509{
1510 struct rt6_exception_bucket *bucket;
1511 struct rt6_exception *rt6_ex;
1512 int i;
1513
1514 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1515 lockdep_is_held(&rt6_exception_lock));
1516
1517 if (bucket) {
1518 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1519 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1520 struct rt6_info *entry = rt6_ex->rt6i;
1521 /* For RTF_CACHE with rt6i_pmtu == 0
1522 * (i.e. a redirected route),
1523 * the metrics of its rt->dst.from has already
1524 * been updated.
1525 */
1526 if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
1527 entry->rt6i_pmtu = mtu;
1528 }
1529 bucket++;
1530 }
1531 }
1532}
1533
b16cb459
WW
1534#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1535
1536static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1537 struct in6_addr *gateway)
1538{
1539 struct rt6_exception_bucket *bucket;
1540 struct rt6_exception *rt6_ex;
1541 struct hlist_node *tmp;
1542 int i;
1543
1544 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1545 return;
1546
1547 spin_lock_bh(&rt6_exception_lock);
1548 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1549 lockdep_is_held(&rt6_exception_lock));
1550
1551 if (bucket) {
1552 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1553 hlist_for_each_entry_safe(rt6_ex, tmp,
1554 &bucket->chain, hlist) {
1555 struct rt6_info *entry = rt6_ex->rt6i;
1556
1557 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1558 RTF_CACHE_GATEWAY &&
1559 ipv6_addr_equal(gateway,
1560 &entry->rt6i_gateway)) {
1561 rt6_remove_exception(bucket, rt6_ex);
1562 }
1563 }
1564 bucket++;
1565 }
1566 }
1567
1568 spin_unlock_bh(&rt6_exception_lock);
1569}
1570
c757faa8
WW
1571static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1572 struct rt6_exception *rt6_ex,
1573 struct fib6_gc_args *gc_args,
1574 unsigned long now)
1575{
1576 struct rt6_info *rt = rt6_ex->rt6i;
1577
1859bac0
PA
1578 /* we are pruning and obsoleting aged-out and non gateway exceptions
1579 * even if others have still references to them, so that on next
1580 * dst_check() such references can be dropped.
1581 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1582 * expired, independently from their aging, as per RFC 8201 section 4
1583 */
1584 if (!(rt->rt6i_flags & RTF_EXPIRES) &&
c757faa8
WW
1585 time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1586 RT6_TRACE("aging clone %p\n", rt);
1587 rt6_remove_exception(bucket, rt6_ex);
1588 return;
1589 } else if (rt->rt6i_flags & RTF_GATEWAY) {
1590 struct neighbour *neigh;
1591 __u8 neigh_flags = 0;
1592
1593 neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
1594 if (neigh) {
1595 neigh_flags = neigh->flags;
1596 neigh_release(neigh);
1597 }
1598 if (!(neigh_flags & NTF_ROUTER)) {
1599 RT6_TRACE("purging route %p via non-router but gateway\n",
1600 rt);
1601 rt6_remove_exception(bucket, rt6_ex);
1602 return;
1603 }
1859bac0
PA
1604 } else if (__rt6_check_expired(rt)) {
1605 RT6_TRACE("purging expired route %p\n", rt);
1606 rt6_remove_exception(bucket, rt6_ex);
1607 return;
c757faa8
WW
1608 }
1609 gc_args->more++;
1610}
1611
1612void rt6_age_exceptions(struct rt6_info *rt,
1613 struct fib6_gc_args *gc_args,
1614 unsigned long now)
1615{
1616 struct rt6_exception_bucket *bucket;
1617 struct rt6_exception *rt6_ex;
1618 struct hlist_node *tmp;
1619 int i;
1620
1621 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1622 return;
1623
1624 spin_lock_bh(&rt6_exception_lock);
1625 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1626 lockdep_is_held(&rt6_exception_lock));
1627
1628 if (bucket) {
1629 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1630 hlist_for_each_entry_safe(rt6_ex, tmp,
1631 &bucket->chain, hlist) {
1632 rt6_age_examine_exception(bucket, rt6_ex,
1633 gc_args, now);
1634 }
1635 bucket++;
1636 }
1637 }
1638 spin_unlock_bh(&rt6_exception_lock);
1639}
1640
9ff74384
DA
1641struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1642 int oif, struct flowi6 *fl6, int flags)
1da177e4 1643{
367efcb9 1644 struct fib6_node *fn, *saved_fn;
2b760fcf 1645 struct rt6_info *rt, *rt_cache;
c71099ac 1646 int strict = 0;
1da177e4 1647
77d16f45 1648 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1649 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1650 if (net->ipv6.devconf_all->forwarding == 0)
1651 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1652
66f5d6ce 1653 rcu_read_lock();
1da177e4 1654
4c9483b2 1655 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1656 saved_fn = fn;
1da177e4 1657
ca254490
DA
1658 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1659 oif = 0;
1660
a3c00e46 1661redo_rt6_select:
8d1040e8 1662 rt = rt6_select(net, fn, oif, strict);
52bd4c0c 1663 if (rt->rt6i_nsiblings)
367efcb9 1664 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1665 if (rt == net->ipv6.ip6_null_entry) {
1666 fn = fib6_backtrack(fn, &fl6->saddr);
1667 if (fn)
1668 goto redo_rt6_select;
367efcb9
MKL
1669 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1670 /* also consider unreachable route */
1671 strict &= ~RT6_LOOKUP_F_REACHABLE;
1672 fn = saved_fn;
1673 goto redo_rt6_select;
367efcb9 1674 }
a3c00e46
MKL
1675 }
1676
2b760fcf
WW
1677 /*Search through exception table */
1678 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
1679 if (rt_cache)
1680 rt = rt_cache;
fb9de91e 1681
d3843fe5 1682 if (rt == net->ipv6.ip6_null_entry) {
66f5d6ce 1683 rcu_read_unlock();
d3843fe5
WW
1684 dst_hold(&rt->dst);
1685 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1686 return rt;
1687 } else if (rt->rt6i_flags & RTF_CACHE) {
1688 if (ip6_hold_safe(net, &rt, true)) {
1689 dst_use_noref(&rt->dst, jiffies);
1690 rt6_dst_from_metrics_check(rt);
1691 }
66f5d6ce 1692 rcu_read_unlock();
b811580d 1693 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
d52d3997 1694 return rt;
3da59bd9
MKL
1695 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1696 !(rt->rt6i_flags & RTF_GATEWAY))) {
1697 /* Create a RTF_CACHE clone which will not be
1698 * owned by the fib6 tree. It is for the special case where
1699 * the daddr in the skb during the neighbor look-up is different
1700 * from the fl6->daddr used to look-up route here.
1701 */
1702
1703 struct rt6_info *uncached_rt;
1704
d3843fe5
WW
1705 if (ip6_hold_safe(net, &rt, true)) {
1706 dst_use_noref(&rt->dst, jiffies);
1707 } else {
66f5d6ce 1708 rcu_read_unlock();
d3843fe5
WW
1709 uncached_rt = rt;
1710 goto uncached_rt_out;
1711 }
66f5d6ce 1712 rcu_read_unlock();
d52d3997 1713
3da59bd9
MKL
1714 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1715 dst_release(&rt->dst);
c71099ac 1716
1cfb71ee
WW
1717 if (uncached_rt) {
1718 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1719 * No need for another dst_hold()
1720 */
8d0b94af 1721 rt6_uncached_list_add(uncached_rt);
81eb8447 1722 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1723 } else {
3da59bd9 1724 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1725 dst_hold(&uncached_rt->dst);
1726 }
b811580d 1727
d3843fe5 1728uncached_rt_out:
b811580d 1729 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
3da59bd9 1730 return uncached_rt;
3da59bd9 1731
d52d3997
MKL
1732 } else {
1733 /* Get a percpu copy */
1734
1735 struct rt6_info *pcpu_rt;
1736
d3843fe5 1737 dst_use_noref(&rt->dst, jiffies);
951f788a 1738 local_bh_disable();
d52d3997 1739 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1740
951f788a 1741 if (!pcpu_rt) {
a94b9367
WW
1742 /* atomic_inc_not_zero() is needed when using rcu */
1743 if (atomic_inc_not_zero(&rt->rt6i_ref)) {
951f788a 1744 /* No dst_hold() on rt is needed because grabbing
a94b9367
WW
1745 * rt->rt6i_ref makes sure rt can't be released.
1746 */
a94b9367
WW
1747 pcpu_rt = rt6_make_pcpu_route(rt);
1748 rt6_release(rt);
1749 } else {
1750 /* rt is already removed from tree */
a94b9367
WW
1751 pcpu_rt = net->ipv6.ip6_null_entry;
1752 dst_hold(&pcpu_rt->dst);
1753 }
9c7370a1 1754 }
951f788a
ED
1755 local_bh_enable();
1756 rcu_read_unlock();
b811580d 1757 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
d52d3997
MKL
1758 return pcpu_rt;
1759 }
1da177e4 1760}
9ff74384 1761EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1762
8ed67789 1763static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1764 struct flowi6 *fl6, int flags)
4acad72d 1765{
4c9483b2 1766 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1767}
1768
d409b847
MB
1769struct dst_entry *ip6_route_input_lookup(struct net *net,
1770 struct net_device *dev,
1771 struct flowi6 *fl6, int flags)
72331bc0
SL
1772{
1773 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1774 flags |= RT6_LOOKUP_F_IFACE;
1775
1776 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1777}
d409b847 1778EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1779
23aebdac
JS
1780static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1781 struct flow_keys *keys)
1782{
1783 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1784 const struct ipv6hdr *key_iph = outer_iph;
1785 const struct ipv6hdr *inner_iph;
1786 const struct icmp6hdr *icmph;
1787 struct ipv6hdr _inner_iph;
1788
1789 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1790 goto out;
1791
1792 icmph = icmp6_hdr(skb);
1793 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1794 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1795 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1796 icmph->icmp6_type != ICMPV6_PARAMPROB)
1797 goto out;
1798
1799 inner_iph = skb_header_pointer(skb,
1800 skb_transport_offset(skb) + sizeof(*icmph),
1801 sizeof(_inner_iph), &_inner_iph);
1802 if (!inner_iph)
1803 goto out;
1804
1805 key_iph = inner_iph;
1806out:
1807 memset(keys, 0, sizeof(*keys));
1808 keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1809 keys->addrs.v6addrs.src = key_iph->saddr;
1810 keys->addrs.v6addrs.dst = key_iph->daddr;
1811 keys->tags.flow_label = ip6_flowinfo(key_iph);
1812 keys->basic.ip_proto = key_iph->nexthdr;
1813}
1814
1815/* if skb is set it will be used and fl6 can be NULL */
1816u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
1817{
1818 struct flow_keys hash_keys;
1819
1820 if (skb) {
1821 ip6_multipath_l3_keys(skb, &hash_keys);
1822 return flow_hash_from_keys(&hash_keys);
1823 }
1824
1825 return get_hash_from_flowi6(fl6);
1826}
1827
c71099ac
TG
1828void ip6_route_input(struct sk_buff *skb)
1829{
b71d1d42 1830 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1831 struct net *net = dev_net(skb->dev);
adaa70bb 1832 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1833 struct ip_tunnel_info *tun_info;
4c9483b2 1834 struct flowi6 fl6 = {
e0d56fdd 1835 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
1836 .daddr = iph->daddr,
1837 .saddr = iph->saddr,
6502ca52 1838 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1839 .flowi6_mark = skb->mark,
1840 .flowi6_proto = iph->nexthdr,
c71099ac 1841 };
adaa70bb 1842
904af04d 1843 tun_info = skb_tunnel_info(skb);
46fa062a 1844 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1845 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
23aebdac
JS
1846 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
1847 fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
06e9d040 1848 skb_dst_drop(skb);
72331bc0 1849 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1850}
1851
8ed67789 1852static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1853 struct flowi6 *fl6, int flags)
1da177e4 1854{
4c9483b2 1855 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1856}
1857
6f21c96a
PA
1858struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1859 struct flowi6 *fl6, int flags)
c71099ac 1860{
d46a9d67 1861 bool any_src;
c71099ac 1862
4c1feac5
DA
1863 if (rt6_need_strict(&fl6->daddr)) {
1864 struct dst_entry *dst;
1865
1866 dst = l3mdev_link_scope_lookup(net, fl6);
1867 if (dst)
1868 return dst;
1869 }
ca254490 1870
1fb9489b 1871 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1872
d46a9d67 1873 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1874 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1875 (fl6->flowi6_oif && any_src))
77d16f45 1876 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1877
d46a9d67 1878 if (!any_src)
adaa70bb 1879 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1880 else if (sk)
1881 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1882
4c9483b2 1883 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1884}
6f21c96a 1885EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 1886
2774c131 1887struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1888{
5c1e6aa3 1889 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 1890 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
1891 struct dst_entry *new = NULL;
1892
1dbe3252 1893 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 1894 DST_OBSOLETE_DEAD, 0);
14e50e57 1895 if (rt) {
0a1f5962 1896 rt6_info_init(rt);
81eb8447 1897 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 1898
0a1f5962 1899 new = &rt->dst;
14e50e57 1900 new->__use = 1;
352e512c 1901 new->input = dst_discard;
ede2059d 1902 new->output = dst_discard_out;
14e50e57 1903
0a1f5962 1904 dst_copy_metrics(new, &ort->dst);
14e50e57 1905
1dbe3252 1906 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 1907 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 1908 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
1909 rt->rt6i_metric = 0;
1910
1911 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1912#ifdef CONFIG_IPV6_SUBTREES
1913 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1914#endif
14e50e57
DM
1915 }
1916
69ead7af
DM
1917 dst_release(dst_orig);
1918 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1919}
14e50e57 1920
1da177e4
LT
1921/*
1922 * Destination cache support functions
1923 */
1924
4b32b5ad
MKL
1925static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1926{
1927 if (rt->dst.from &&
1928 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1929 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1930}
1931
3da59bd9
MKL
1932static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1933{
36143645 1934 u32 rt_cookie = 0;
c5cff856
WW
1935
1936 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
3da59bd9
MKL
1937 return NULL;
1938
1939 if (rt6_check_expired(rt))
1940 return NULL;
1941
1942 return &rt->dst;
1943}
1944
1945static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1946{
5973fb1e
MKL
1947 if (!__rt6_check_expired(rt) &&
1948 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3da59bd9
MKL
1949 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1950 return &rt->dst;
1951 else
1952 return NULL;
1953}
1954
1da177e4
LT
1955static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1956{
1957 struct rt6_info *rt;
1958
1959 rt = (struct rt6_info *) dst;
1960
6f3118b5
ND
1961 /* All IPV6 dsts are created with ->obsolete set to the value
1962 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1963 * into this function always.
1964 */
e3bc10bd 1965
4b32b5ad
MKL
1966 rt6_dst_from_metrics_check(rt);
1967
02bcf4e0 1968 if (rt->rt6i_flags & RTF_PCPU ||
a4c2fd7f 1969 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
3da59bd9
MKL
1970 return rt6_dst_from_check(rt, cookie);
1971 else
1972 return rt6_check(rt, cookie);
1da177e4
LT
1973}
1974
1975static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1976{
1977 struct rt6_info *rt = (struct rt6_info *) dst;
1978
1979 if (rt) {
54c1a859
YH
1980 if (rt->rt6i_flags & RTF_CACHE) {
1981 if (rt6_check_expired(rt)) {
1982 ip6_del_rt(rt);
1983 dst = NULL;
1984 }
1985 } else {
1da177e4 1986 dst_release(dst);
54c1a859
YH
1987 dst = NULL;
1988 }
1da177e4 1989 }
54c1a859 1990 return dst;
1da177e4
LT
1991}
1992
1993static void ip6_link_failure(struct sk_buff *skb)
1994{
1995 struct rt6_info *rt;
1996
3ffe533c 1997 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1998
adf30907 1999 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2000 if (rt) {
1eb4f758 2001 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0
WW
2002 if (dst_hold_safe(&rt->dst))
2003 ip6_del_rt(rt);
c5cff856
WW
2004 } else {
2005 struct fib6_node *fn;
2006
2007 rcu_read_lock();
2008 fn = rcu_dereference(rt->rt6i_node);
2009 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2010 fn->fn_sernum = -1;
2011 rcu_read_unlock();
1eb4f758 2012 }
1da177e4
LT
2013 }
2014}
2015
45e4fd26
MKL
2016static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2017{
2018 struct net *net = dev_net(rt->dst.dev);
2019
2020 rt->rt6i_flags |= RTF_MODIFIED;
2021 rt->rt6i_pmtu = mtu;
2022 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2023}
2024
0d3f6d29
MKL
2025static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2026{
2027 return !(rt->rt6i_flags & RTF_CACHE) &&
4e587ea7
WW
2028 (rt->rt6i_flags & RTF_PCPU ||
2029 rcu_access_pointer(rt->rt6i_node));
0d3f6d29
MKL
2030}
2031
45e4fd26
MKL
2032static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2033 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2034{
0dec879f 2035 const struct in6_addr *daddr, *saddr;
67ba4152 2036 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2037
45e4fd26
MKL
2038 if (rt6->rt6i_flags & RTF_LOCAL)
2039 return;
81aded24 2040
19bda36c
XL
2041 if (dst_metric_locked(dst, RTAX_MTU))
2042 return;
2043
0dec879f
JA
2044 if (iph) {
2045 daddr = &iph->daddr;
2046 saddr = &iph->saddr;
2047 } else if (sk) {
2048 daddr = &sk->sk_v6_daddr;
2049 saddr = &inet6_sk(sk)->saddr;
2050 } else {
2051 daddr = NULL;
2052 saddr = NULL;
2053 }
2054 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2055 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2056 if (mtu >= dst_mtu(dst))
2057 return;
9d289715 2058
0d3f6d29 2059 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2060 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2061 /* update rt6_ex->stamp for cache */
2062 if (rt6->rt6i_flags & RTF_CACHE)
2063 rt6_update_exception_stamp_rt(rt6);
0dec879f 2064 } else if (daddr) {
45e4fd26
MKL
2065 struct rt6_info *nrt6;
2066
45e4fd26
MKL
2067 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
2068 if (nrt6) {
2069 rt6_do_update_pmtu(nrt6, mtu);
2b760fcf
WW
2070 if (rt6_insert_exception(nrt6, rt6))
2071 dst_release_immediate(&nrt6->dst);
45e4fd26 2072 }
1da177e4
LT
2073 }
2074}
2075
45e4fd26
MKL
2076static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2077 struct sk_buff *skb, u32 mtu)
2078{
2079 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2080}
2081
42ae66c8 2082void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2083 int oif, u32 mark, kuid_t uid)
81aded24
DM
2084{
2085 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2086 struct dst_entry *dst;
2087 struct flowi6 fl6;
2088
2089 memset(&fl6, 0, sizeof(fl6));
2090 fl6.flowi6_oif = oif;
1b3c61dc 2091 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
2092 fl6.daddr = iph->daddr;
2093 fl6.saddr = iph->saddr;
6502ca52 2094 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2095 fl6.flowi6_uid = uid;
81aded24
DM
2096
2097 dst = ip6_route_output(net, NULL, &fl6);
2098 if (!dst->error)
45e4fd26 2099 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2100 dst_release(dst);
2101}
2102EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2103
2104void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2105{
33c162a9
MKL
2106 struct dst_entry *dst;
2107
81aded24 2108 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 2109 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2110
2111 dst = __sk_dst_get(sk);
2112 if (!dst || !dst->obsolete ||
2113 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2114 return;
2115
2116 bh_lock_sock(sk);
2117 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2118 ip6_datagram_dst_update(sk, false);
2119 bh_unlock_sock(sk);
81aded24
DM
2120}
2121EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2122
b55b76b2
DJ
2123/* Handle redirects */
2124struct ip6rd_flowi {
2125 struct flowi6 fl6;
2126 struct in6_addr gateway;
2127};
2128
2129static struct rt6_info *__ip6_route_redirect(struct net *net,
2130 struct fib6_table *table,
2131 struct flowi6 *fl6,
2132 int flags)
2133{
2134 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2b760fcf 2135 struct rt6_info *rt, *rt_cache;
b55b76b2
DJ
2136 struct fib6_node *fn;
2137
2138 /* Get the "current" route for this destination and
67c408cf 2139 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2140 *
2141 * RFC 4861 specifies that redirects should only be
2142 * accepted if they come from the nexthop to the target.
2143 * Due to the way the routes are chosen, this notion
2144 * is a bit fuzzy and one might need to check all possible
2145 * routes.
2146 */
2147
66f5d6ce 2148 rcu_read_lock();
b55b76b2
DJ
2149 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2150restart:
66f5d6ce 2151 for_each_fib6_node_rt_rcu(fn) {
b55b76b2
DJ
2152 if (rt6_check_expired(rt))
2153 continue;
2154 if (rt->dst.error)
2155 break;
2156 if (!(rt->rt6i_flags & RTF_GATEWAY))
2157 continue;
2158 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
2159 continue;
2b760fcf
WW
2160 /* rt_cache's gateway might be different from its 'parent'
2161 * in the case of an ip redirect.
2162 * So we keep searching in the exception table if the gateway
2163 * is different.
2164 */
2165 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
2166 rt_cache = rt6_find_cached_rt(rt,
2167 &fl6->daddr,
2168 &fl6->saddr);
2169 if (rt_cache &&
2170 ipv6_addr_equal(&rdfl->gateway,
2171 &rt_cache->rt6i_gateway)) {
2172 rt = rt_cache;
2173 break;
2174 }
b55b76b2 2175 continue;
2b760fcf 2176 }
b55b76b2
DJ
2177 break;
2178 }
2179
2180 if (!rt)
2181 rt = net->ipv6.ip6_null_entry;
2182 else if (rt->dst.error) {
2183 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2184 goto out;
2185 }
2186
2187 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
2188 fn = fib6_backtrack(fn, &fl6->saddr);
2189 if (fn)
2190 goto restart;
b55b76b2 2191 }
a3c00e46 2192
b0a1ba59 2193out:
d3843fe5 2194 ip6_hold_safe(net, &rt, true);
b55b76b2 2195
66f5d6ce 2196 rcu_read_unlock();
b55b76b2 2197
b811580d 2198 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
b55b76b2
DJ
2199 return rt;
2200};
2201
2202static struct dst_entry *ip6_route_redirect(struct net *net,
2203 const struct flowi6 *fl6,
2204 const struct in6_addr *gateway)
2205{
2206 int flags = RT6_LOOKUP_F_HAS_SADDR;
2207 struct ip6rd_flowi rdfl;
2208
2209 rdfl.fl6 = *fl6;
2210 rdfl.gateway = *gateway;
2211
2212 return fib6_rule_lookup(net, &rdfl.fl6,
2213 flags, __ip6_route_redirect);
2214}
2215
e2d118a1
LC
2216void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2217 kuid_t uid)
3a5ad2ee
DM
2218{
2219 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2220 struct dst_entry *dst;
2221 struct flowi6 fl6;
2222
2223 memset(&fl6, 0, sizeof(fl6));
e374c618 2224 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2225 fl6.flowi6_oif = oif;
2226 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2227 fl6.daddr = iph->daddr;
2228 fl6.saddr = iph->saddr;
6502ca52 2229 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2230 fl6.flowi6_uid = uid;
3a5ad2ee 2231
b55b76b2
DJ
2232 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
2233 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2234 dst_release(dst);
2235}
2236EXPORT_SYMBOL_GPL(ip6_redirect);
2237
c92a59ec
DJ
2238void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2239 u32 mark)
2240{
2241 const struct ipv6hdr *iph = ipv6_hdr(skb);
2242 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2243 struct dst_entry *dst;
2244 struct flowi6 fl6;
2245
2246 memset(&fl6, 0, sizeof(fl6));
e374c618 2247 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2248 fl6.flowi6_oif = oif;
2249 fl6.flowi6_mark = mark;
c92a59ec
DJ
2250 fl6.daddr = msg->dest;
2251 fl6.saddr = iph->daddr;
e2d118a1 2252 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2253
b55b76b2
DJ
2254 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
2255 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2256 dst_release(dst);
2257}
2258
3a5ad2ee
DM
2259void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2260{
e2d118a1
LC
2261 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2262 sk->sk_uid);
3a5ad2ee
DM
2263}
2264EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2265
0dbaee3b 2266static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2267{
0dbaee3b
DM
2268 struct net_device *dev = dst->dev;
2269 unsigned int mtu = dst_mtu(dst);
2270 struct net *net = dev_net(dev);
2271
1da177e4
LT
2272 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2273
5578689a
DL
2274 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2275 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2276
2277 /*
1ab1457c
YH
2278 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2279 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2280 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2281 * rely only on pmtu discovery"
2282 */
2283 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2284 mtu = IPV6_MAXPLEN;
2285 return mtu;
2286}
2287
ebb762f2 2288static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2289{
4b32b5ad
MKL
2290 const struct rt6_info *rt = (const struct rt6_info *)dst;
2291 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 2292 struct inet6_dev *idev;
618f9bc7 2293
4b32b5ad
MKL
2294 if (mtu)
2295 goto out;
2296
2297 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2298 if (mtu)
30f78d8e 2299 goto out;
618f9bc7
SK
2300
2301 mtu = IPV6_MIN_MTU;
d33e4553
DM
2302
2303 rcu_read_lock();
2304 idev = __in6_dev_get(dst->dev);
2305 if (idev)
2306 mtu = idev->cnf.mtu6;
2307 rcu_read_unlock();
2308
30f78d8e 2309out:
14972cbd
RP
2310 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2311
2312 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2313}
2314
3b00944c 2315struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2316 struct flowi6 *fl6)
1da177e4 2317{
87a11578 2318 struct dst_entry *dst;
1da177e4
LT
2319 struct rt6_info *rt;
2320 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2321 struct net *net = dev_net(dev);
1da177e4 2322
38308473 2323 if (unlikely(!idev))
122bdf67 2324 return ERR_PTR(-ENODEV);
1da177e4 2325
ad706862 2326 rt = ip6_dst_alloc(net, dev, 0);
38308473 2327 if (unlikely(!rt)) {
1da177e4 2328 in6_dev_put(idev);
87a11578 2329 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2330 goto out;
2331 }
2332
8e2ec639
YZ
2333 rt->dst.flags |= DST_HOST;
2334 rt->dst.output = ip6_output;
550bab42 2335 rt->rt6i_gateway = fl6->daddr;
87a11578 2336 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2337 rt->rt6i_dst.plen = 128;
2338 rt->rt6i_idev = idev;
14edd87d 2339 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2340
587fea74
WW
2341 /* Add this dst into uncached_list so that rt6_ifdown() can
2342 * do proper release of the net_device
2343 */
2344 rt6_uncached_list_add(rt);
81eb8447 2345 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2346
87a11578
DM
2347 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2348
1da177e4 2349out:
87a11578 2350 return dst;
1da177e4
LT
2351}
2352
569d3645 2353static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2354{
86393e52 2355 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2356 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2357 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2358 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2359 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2360 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2361 int entries;
7019b78e 2362
fc66f95c 2363 entries = dst_entries_get_fast(ops);
49a18d86 2364 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2365 entries <= rt_max_size)
1da177e4
LT
2366 goto out;
2367
6891a346 2368 net->ipv6.ip6_rt_gc_expire++;
14956643 2369 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2370 entries = dst_entries_get_slow(ops);
2371 if (entries < ops->gc_thresh)
7019b78e 2372 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2373out:
7019b78e 2374 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2375 return entries > rt_max_size;
1da177e4
LT
2376}
2377
e715b6d3
FW
2378static int ip6_convert_metrics(struct mx6_config *mxc,
2379 const struct fib6_config *cfg)
2380{
c3a8d947 2381 bool ecn_ca = false;
e715b6d3
FW
2382 struct nlattr *nla;
2383 int remaining;
2384 u32 *mp;
2385
63159f29 2386 if (!cfg->fc_mx)
e715b6d3
FW
2387 return 0;
2388
2389 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
2390 if (unlikely(!mp))
2391 return -ENOMEM;
2392
2393 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
2394 int type = nla_type(nla);
1bb14807 2395 u32 val;
e715b6d3 2396
1bb14807
DB
2397 if (!type)
2398 continue;
2399 if (unlikely(type > RTAX_MAX))
2400 goto err;
ea697639 2401
1bb14807
DB
2402 if (type == RTAX_CC_ALGO) {
2403 char tmp[TCP_CA_NAME_MAX];
e715b6d3 2404
1bb14807 2405 nla_strlcpy(tmp, nla, sizeof(tmp));
c3a8d947 2406 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1bb14807
DB
2407 if (val == TCP_CA_UNSPEC)
2408 goto err;
2409 } else {
2410 val = nla_get_u32(nla);
e715b6d3 2411 }
626abd59
PA
2412 if (type == RTAX_HOPLIMIT && val > 255)
2413 val = 255;
b8d3e416
DB
2414 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
2415 goto err;
1bb14807
DB
2416
2417 mp[type - 1] = val;
2418 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
2419 }
2420
c3a8d947
DB
2421 if (ecn_ca) {
2422 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
2423 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
2424 }
e715b6d3 2425
c3a8d947 2426 mxc->mx = mp;
e715b6d3
FW
2427 return 0;
2428 err:
2429 kfree(mp);
2430 return -EINVAL;
2431}
1da177e4 2432
8c14586f
DA
2433static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2434 struct fib6_config *cfg,
2435 const struct in6_addr *gw_addr)
2436{
2437 struct flowi6 fl6 = {
2438 .flowi6_oif = cfg->fc_ifindex,
2439 .daddr = *gw_addr,
2440 .saddr = cfg->fc_prefsrc,
2441 };
2442 struct fib6_table *table;
2443 struct rt6_info *rt;
d5d32e4b 2444 int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
8c14586f
DA
2445
2446 table = fib6_get_table(net, cfg->fc_table);
2447 if (!table)
2448 return NULL;
2449
2450 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2451 flags |= RT6_LOOKUP_F_HAS_SADDR;
2452
2453 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
2454
2455 /* if table lookup failed, fall back to full lookup */
2456 if (rt == net->ipv6.ip6_null_entry) {
2457 ip6_rt_put(rt);
2458 rt = NULL;
2459 }
2460
2461 return rt;
2462}
2463
333c4301
DA
2464static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2465 struct netlink_ext_ack *extack)
1da177e4 2466{
5578689a 2467 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
2468 struct rt6_info *rt = NULL;
2469 struct net_device *dev = NULL;
2470 struct inet6_dev *idev = NULL;
c71099ac 2471 struct fib6_table *table;
1da177e4 2472 int addr_type;
8c5b83f0 2473 int err = -EINVAL;
1da177e4 2474
557c44be 2475 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2476 if (cfg->fc_flags & RTF_PCPU) {
2477 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2478 goto out;
d5d531cb 2479 }
557c44be 2480
d5d531cb
DA
2481 if (cfg->fc_dst_len > 128) {
2482 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2483 goto out;
2484 }
2485 if (cfg->fc_src_len > 128) {
2486 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2487 goto out;
d5d531cb 2488 }
1da177e4 2489#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2490 if (cfg->fc_src_len) {
2491 NL_SET_ERR_MSG(extack,
2492 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2493 goto out;
d5d531cb 2494 }
1da177e4 2495#endif
86872cb5 2496 if (cfg->fc_ifindex) {
1da177e4 2497 err = -ENODEV;
5578689a 2498 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2499 if (!dev)
2500 goto out;
2501 idev = in6_dev_get(dev);
2502 if (!idev)
2503 goto out;
2504 }
2505
86872cb5
TG
2506 if (cfg->fc_metric == 0)
2507 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2508
d71314b4 2509 err = -ENOBUFS;
38308473
DM
2510 if (cfg->fc_nlinfo.nlh &&
2511 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2512 table = fib6_get_table(net, cfg->fc_table);
38308473 2513 if (!table) {
f3213831 2514 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2515 table = fib6_new_table(net, cfg->fc_table);
2516 }
2517 } else {
2518 table = fib6_new_table(net, cfg->fc_table);
2519 }
38308473
DM
2520
2521 if (!table)
c71099ac 2522 goto out;
c71099ac 2523
ad706862
MKL
2524 rt = ip6_dst_alloc(net, NULL,
2525 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 2526
38308473 2527 if (!rt) {
1da177e4
LT
2528 err = -ENOMEM;
2529 goto out;
2530 }
2531
1716a961
G
2532 if (cfg->fc_flags & RTF_EXPIRES)
2533 rt6_set_expires(rt, jiffies +
2534 clock_t_to_jiffies(cfg->fc_expires));
2535 else
2536 rt6_clean_expires(rt);
1da177e4 2537
86872cb5
TG
2538 if (cfg->fc_protocol == RTPROT_UNSPEC)
2539 cfg->fc_protocol = RTPROT_BOOT;
2540 rt->rt6i_protocol = cfg->fc_protocol;
2541
2542 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
2543
2544 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 2545 rt->dst.input = ip6_mc_input;
ab79ad14
2546 else if (cfg->fc_flags & RTF_LOCAL)
2547 rt->dst.input = ip6_input;
1da177e4 2548 else
d8d1f30b 2549 rt->dst.input = ip6_forward;
1da177e4 2550
d8d1f30b 2551 rt->dst.output = ip6_output;
1da177e4 2552
19e42e45
RP
2553 if (cfg->fc_encap) {
2554 struct lwtunnel_state *lwtstate;
2555
30357d7d 2556 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 2557 cfg->fc_encap, AF_INET6, cfg,
9ae28727 2558 &lwtstate, extack);
19e42e45
RP
2559 if (err)
2560 goto out;
61adedf3
JB
2561 rt->dst.lwtstate = lwtstate_get(lwtstate);
2562 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
2563 rt->dst.lwtstate->orig_output = rt->dst.output;
2564 rt->dst.output = lwtunnel_output;
25368623 2565 }
61adedf3
JB
2566 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
2567 rt->dst.lwtstate->orig_input = rt->dst.input;
2568 rt->dst.input = lwtunnel_input;
25368623 2569 }
19e42e45
RP
2570 }
2571
86872cb5
TG
2572 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2573 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 2574 if (rt->rt6i_dst.plen == 128)
e5fd387a 2575 rt->dst.flags |= DST_HOST;
e5fd387a 2576
1da177e4 2577#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
2578 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2579 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
2580#endif
2581
86872cb5 2582 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
2583
2584 /* We cannot add true routes via loopback here,
2585 they would result in kernel looping; promote them to reject routes
2586 */
86872cb5 2587 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
2588 (dev && (dev->flags & IFF_LOOPBACK) &&
2589 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2590 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 2591 /* hold loopback dev/idev if we haven't done so. */
5578689a 2592 if (dev != net->loopback_dev) {
1da177e4
LT
2593 if (dev) {
2594 dev_put(dev);
2595 in6_dev_put(idev);
2596 }
5578689a 2597 dev = net->loopback_dev;
1da177e4
LT
2598 dev_hold(dev);
2599 idev = in6_dev_get(dev);
2600 if (!idev) {
2601 err = -ENODEV;
2602 goto out;
2603 }
2604 }
1da177e4 2605 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
2606 switch (cfg->fc_type) {
2607 case RTN_BLACKHOLE:
2608 rt->dst.error = -EINVAL;
ede2059d 2609 rt->dst.output = dst_discard_out;
7150aede 2610 rt->dst.input = dst_discard;
ef2c7d7b
ND
2611 break;
2612 case RTN_PROHIBIT:
2613 rt->dst.error = -EACCES;
7150aede
K
2614 rt->dst.output = ip6_pkt_prohibit_out;
2615 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 2616 break;
b4949ab2 2617 case RTN_THROW:
0315e382 2618 case RTN_UNREACHABLE:
ef2c7d7b 2619 default:
7150aede 2620 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
2621 : (cfg->fc_type == RTN_UNREACHABLE)
2622 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
2623 rt->dst.output = ip6_pkt_discard_out;
2624 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
2625 break;
2626 }
1da177e4
LT
2627 goto install_route;
2628 }
2629
86872cb5 2630 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 2631 const struct in6_addr *gw_addr;
1da177e4
LT
2632 int gwa_type;
2633
86872cb5 2634 gw_addr = &cfg->fc_gateway;
330567b7 2635 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
2636
2637 /* if gw_addr is local we will fail to detect this in case
2638 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2639 * will return already-added prefix route via interface that
2640 * prefix route was assigned to, which might be non-loopback.
2641 */
2642 err = -EINVAL;
330567b7
FW
2643 if (ipv6_chk_addr_and_flags(net, gw_addr,
2644 gwa_type & IPV6_ADDR_LINKLOCAL ?
d5d531cb
DA
2645 dev : NULL, 0, 0)) {
2646 NL_SET_ERR_MSG(extack, "Invalid gateway address");
48ed7b26 2647 goto out;
d5d531cb 2648 }
4e3fd7a0 2649 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
2650
2651 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
8c14586f 2652 struct rt6_info *grt = NULL;
1da177e4
LT
2653
2654 /* IPv6 strictly inhibits using not link-local
2655 addresses as nexthop address.
2656 Otherwise, router will not able to send redirects.
2657 It is very good, but in some (rare!) circumstances
2658 (SIT, PtP, NBMA NOARP links) it is handy to allow
2659 some exceptions. --ANK
96d5822c
EN
2660 We allow IPv4-mapped nexthops to support RFC4798-type
2661 addressing
1da177e4 2662 */
96d5822c 2663 if (!(gwa_type & (IPV6_ADDR_UNICAST |
d5d531cb
DA
2664 IPV6_ADDR_MAPPED))) {
2665 NL_SET_ERR_MSG(extack,
2666 "Invalid gateway address");
1da177e4 2667 goto out;
d5d531cb 2668 }
1da177e4 2669
a435a07f 2670 if (cfg->fc_table) {
8c14586f
DA
2671 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2672
a435a07f
VB
2673 if (grt) {
2674 if (grt->rt6i_flags & RTF_GATEWAY ||
2675 (dev && dev != grt->dst.dev)) {
2676 ip6_rt_put(grt);
2677 grt = NULL;
2678 }
2679 }
2680 }
2681
8c14586f
DA
2682 if (!grt)
2683 grt = rt6_lookup(net, gw_addr, NULL,
2684 cfg->fc_ifindex, 1);
1da177e4
LT
2685
2686 err = -EHOSTUNREACH;
38308473 2687 if (!grt)
1da177e4
LT
2688 goto out;
2689 if (dev) {
d1918542 2690 if (dev != grt->dst.dev) {
94e187c0 2691 ip6_rt_put(grt);
1da177e4
LT
2692 goto out;
2693 }
2694 } else {
d1918542 2695 dev = grt->dst.dev;
1da177e4
LT
2696 idev = grt->rt6i_idev;
2697 dev_hold(dev);
2698 in6_dev_hold(grt->rt6i_idev);
2699 }
38308473 2700 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 2701 err = 0;
94e187c0 2702 ip6_rt_put(grt);
1da177e4
LT
2703
2704 if (err)
2705 goto out;
2706 }
2707 err = -EINVAL;
d5d531cb
DA
2708 if (!dev) {
2709 NL_SET_ERR_MSG(extack, "Egress device not specified");
2710 goto out;
2711 } else if (dev->flags & IFF_LOOPBACK) {
2712 NL_SET_ERR_MSG(extack,
2713 "Egress device can not be loopback device for this route");
1da177e4 2714 goto out;
d5d531cb 2715 }
1da177e4
LT
2716 }
2717
2718 err = -ENODEV;
38308473 2719 if (!dev)
1da177e4
LT
2720 goto out;
2721
c3968a85
DW
2722 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2723 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 2724 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
2725 err = -EINVAL;
2726 goto out;
2727 }
4e3fd7a0 2728 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
2729 rt->rt6i_prefsrc.plen = 128;
2730 } else
2731 rt->rt6i_prefsrc.plen = 0;
2732
86872cb5 2733 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
2734
2735install_route:
d8d1f30b 2736 rt->dst.dev = dev;
1da177e4 2737 rt->rt6i_idev = idev;
c71099ac 2738 rt->rt6i_table = table;
63152fc0 2739
c346dca1 2740 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 2741
8c5b83f0 2742 return rt;
6b9ea5a6
RP
2743out:
2744 if (dev)
2745 dev_put(dev);
2746 if (idev)
2747 in6_dev_put(idev);
587fea74
WW
2748 if (rt)
2749 dst_release_immediate(&rt->dst);
6b9ea5a6 2750
8c5b83f0 2751 return ERR_PTR(err);
6b9ea5a6
RP
2752}
2753
333c4301
DA
2754int ip6_route_add(struct fib6_config *cfg,
2755 struct netlink_ext_ack *extack)
6b9ea5a6
RP
2756{
2757 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 2758 struct rt6_info *rt;
6b9ea5a6
RP
2759 int err;
2760
333c4301 2761 rt = ip6_route_info_create(cfg, extack);
8c5b83f0
RP
2762 if (IS_ERR(rt)) {
2763 err = PTR_ERR(rt);
2764 rt = NULL;
6b9ea5a6 2765 goto out;
8c5b83f0 2766 }
6b9ea5a6 2767
e715b6d3
FW
2768 err = ip6_convert_metrics(&mxc, cfg);
2769 if (err)
2770 goto out;
1da177e4 2771
333c4301 2772 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
e715b6d3
FW
2773
2774 kfree(mxc.mx);
6b9ea5a6 2775
e715b6d3 2776 return err;
1da177e4 2777out:
587fea74
WW
2778 if (rt)
2779 dst_release_immediate(&rt->dst);
6b9ea5a6 2780
1da177e4
LT
2781 return err;
2782}
2783
86872cb5 2784static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2785{
2786 int err;
c71099ac 2787 struct fib6_table *table;
d1918542 2788 struct net *net = dev_net(rt->dst.dev);
1da177e4 2789
a4c2fd7f 2790 if (rt == net->ipv6.ip6_null_entry) {
6825a26c
G
2791 err = -ENOENT;
2792 goto out;
2793 }
6c813a72 2794
c71099ac 2795 table = rt->rt6i_table;
66f5d6ce 2796 spin_lock_bh(&table->tb6_lock);
86872cb5 2797 err = fib6_del(rt, info);
66f5d6ce 2798 spin_unlock_bh(&table->tb6_lock);
1da177e4 2799
6825a26c 2800out:
94e187c0 2801 ip6_rt_put(rt);
1da177e4
LT
2802 return err;
2803}
2804
e0a1ad73
TG
2805int ip6_del_rt(struct rt6_info *rt)
2806{
4d1169c1 2807 struct nl_info info = {
d1918542 2808 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2809 };
528c4ceb 2810 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2811}
2812
0ae81335
DA
2813static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2814{
2815 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 2816 struct net *net = info->nl_net;
16a16cd3 2817 struct sk_buff *skb = NULL;
0ae81335 2818 struct fib6_table *table;
e3330039 2819 int err = -ENOENT;
0ae81335 2820
e3330039
WC
2821 if (rt == net->ipv6.ip6_null_entry)
2822 goto out_put;
0ae81335 2823 table = rt->rt6i_table;
66f5d6ce 2824 spin_lock_bh(&table->tb6_lock);
0ae81335
DA
2825
2826 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2827 struct rt6_info *sibling, *next_sibling;
2828
16a16cd3
DA
2829 /* prefer to send a single notification with all hops */
2830 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2831 if (skb) {
2832 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2833
e3330039 2834 if (rt6_fill_node(net, skb, rt,
16a16cd3
DA
2835 NULL, NULL, 0, RTM_DELROUTE,
2836 info->portid, seq, 0) < 0) {
2837 kfree_skb(skb);
2838 skb = NULL;
2839 } else
2840 info->skip_notify = 1;
2841 }
2842
0ae81335
DA
2843 list_for_each_entry_safe(sibling, next_sibling,
2844 &rt->rt6i_siblings,
2845 rt6i_siblings) {
2846 err = fib6_del(sibling, info);
2847 if (err)
e3330039 2848 goto out_unlock;
0ae81335
DA
2849 }
2850 }
2851
2852 err = fib6_del(rt, info);
e3330039 2853out_unlock:
66f5d6ce 2854 spin_unlock_bh(&table->tb6_lock);
e3330039 2855out_put:
0ae81335 2856 ip6_rt_put(rt);
16a16cd3
DA
2857
2858 if (skb) {
e3330039 2859 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
2860 info->nlh, gfp_any());
2861 }
0ae81335
DA
2862 return err;
2863}
2864
333c4301
DA
2865static int ip6_route_del(struct fib6_config *cfg,
2866 struct netlink_ext_ack *extack)
1da177e4 2867{
2b760fcf 2868 struct rt6_info *rt, *rt_cache;
c71099ac 2869 struct fib6_table *table;
1da177e4 2870 struct fib6_node *fn;
1da177e4
LT
2871 int err = -ESRCH;
2872
5578689a 2873 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
2874 if (!table) {
2875 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 2876 return err;
d5d531cb 2877 }
c71099ac 2878
66f5d6ce 2879 rcu_read_lock();
1da177e4 2880
c71099ac 2881 fn = fib6_locate(&table->tb6_root,
86872cb5 2882 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 2883 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 2884 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 2885
1da177e4 2886 if (fn) {
66f5d6ce 2887 for_each_fib6_node_rt_rcu(fn) {
2b760fcf
WW
2888 if (cfg->fc_flags & RTF_CACHE) {
2889 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
2890 &cfg->fc_src);
2891 if (!rt_cache)
2892 continue;
2893 rt = rt_cache;
2894 }
86872cb5 2895 if (cfg->fc_ifindex &&
d1918542
DM
2896 (!rt->dst.dev ||
2897 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 2898 continue;
86872cb5
TG
2899 if (cfg->fc_flags & RTF_GATEWAY &&
2900 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2901 continue;
86872cb5 2902 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2903 continue;
c2ed1880
M
2904 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2905 continue;
d3843fe5
WW
2906 if (!dst_hold_safe(&rt->dst))
2907 break;
66f5d6ce 2908 rcu_read_unlock();
1da177e4 2909
0ae81335
DA
2910 /* if gateway was specified only delete the one hop */
2911 if (cfg->fc_flags & RTF_GATEWAY)
2912 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2913
2914 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
2915 }
2916 }
66f5d6ce 2917 rcu_read_unlock();
1da177e4
LT
2918
2919 return err;
2920}
2921
6700c270 2922static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2923{
a6279458 2924 struct netevent_redirect netevent;
e8599ff4 2925 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2926 struct ndisc_options ndopts;
2927 struct inet6_dev *in6_dev;
2928 struct neighbour *neigh;
71bcdba0 2929 struct rd_msg *msg;
6e157b6a
DM
2930 int optlen, on_link;
2931 u8 *lladdr;
e8599ff4 2932
29a3cad5 2933 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2934 optlen -= sizeof(*msg);
e8599ff4
DM
2935
2936 if (optlen < 0) {
6e157b6a 2937 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2938 return;
2939 }
2940
71bcdba0 2941 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2942
71bcdba0 2943 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2944 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2945 return;
2946 }
2947
6e157b6a 2948 on_link = 0;
71bcdba0 2949 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2950 on_link = 1;
71bcdba0 2951 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2952 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2953 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2954 return;
2955 }
2956
2957 in6_dev = __in6_dev_get(skb->dev);
2958 if (!in6_dev)
2959 return;
2960 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2961 return;
2962
2963 /* RFC2461 8.1:
2964 * The IP source address of the Redirect MUST be the same as the current
2965 * first-hop router for the specified ICMP Destination Address.
2966 */
2967
f997c55c 2968 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2969 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2970 return;
2971 }
6e157b6a
DM
2972
2973 lladdr = NULL;
e8599ff4
DM
2974 if (ndopts.nd_opts_tgt_lladdr) {
2975 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2976 skb->dev);
2977 if (!lladdr) {
2978 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2979 return;
2980 }
2981 }
2982
6e157b6a 2983 rt = (struct rt6_info *) dst;
ec13ad1d 2984 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 2985 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2986 return;
6e157b6a 2987 }
e8599ff4 2988
6e157b6a
DM
2989 /* Redirect received -> path was valid.
2990 * Look, redirects are sent only in response to data packets,
2991 * so that this nexthop apparently is reachable. --ANK
2992 */
0dec879f 2993 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 2994
71bcdba0 2995 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2996 if (!neigh)
2997 return;
a6279458 2998
1da177e4
LT
2999 /*
3000 * We have finally decided to accept it.
3001 */
3002
f997c55c 3003 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3004 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3005 NEIGH_UPDATE_F_OVERRIDE|
3006 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3007 NEIGH_UPDATE_F_ISROUTER)),
3008 NDISC_REDIRECT, &ndopts);
1da177e4 3009
83a09abd 3010 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 3011 if (!nrt)
1da177e4
LT
3012 goto out;
3013
3014 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3015 if (on_link)
3016 nrt->rt6i_flags &= ~RTF_GATEWAY;
3017
b91d5329 3018 nrt->rt6i_protocol = RTPROT_REDIRECT;
4e3fd7a0 3019 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3020
2b760fcf
WW
3021 /* No need to remove rt from the exception table if rt is
3022 * a cached route because rt6_insert_exception() will
3023 * takes care of it
3024 */
3025 if (rt6_insert_exception(nrt, rt)) {
3026 dst_release_immediate(&nrt->dst);
3027 goto out;
3028 }
1da177e4 3029
d8d1f30b
CG
3030 netevent.old = &rt->dst;
3031 netevent.new = &nrt->dst;
71bcdba0 3032 netevent.daddr = &msg->dest;
60592833 3033 netevent.neigh = neigh;
8d71740c
TT
3034 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3035
1da177e4 3036out:
e8599ff4 3037 neigh_release(neigh);
6e157b6a
DM
3038}
3039
1da177e4
LT
3040/*
3041 * Misc support functions
3042 */
3043
4b32b5ad
MKL
3044static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
3045{
3046 BUG_ON(from->dst.from);
3047
3048 rt->rt6i_flags &= ~RTF_EXPIRES;
3049 dst_hold(&from->dst);
3050 rt->dst.from = &from->dst;
3051 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
3052}
3053
83a09abd
MKL
3054static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
3055{
3056 rt->dst.input = ort->dst.input;
3057 rt->dst.output = ort->dst.output;
3058 rt->rt6i_dst = ort->rt6i_dst;
3059 rt->dst.error = ort->dst.error;
3060 rt->rt6i_idev = ort->rt6i_idev;
3061 if (rt->rt6i_idev)
3062 in6_dev_hold(rt->rt6i_idev);
3063 rt->dst.lastuse = jiffies;
3064 rt->rt6i_gateway = ort->rt6i_gateway;
3065 rt->rt6i_flags = ort->rt6i_flags;
3066 rt6_set_from(rt, ort);
3067 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 3068#ifdef CONFIG_IPV6_SUBTREES
83a09abd 3069 rt->rt6i_src = ort->rt6i_src;
1da177e4 3070#endif
83a09abd
MKL
3071 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
3072 rt->rt6i_table = ort->rt6i_table;
61adedf3 3073 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
3074}
3075
70ceb4f5 3076#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 3077static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 3078 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3079 const struct in6_addr *gwaddr,
3080 struct net_device *dev)
70ceb4f5 3081{
830218c1
DA
3082 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3083 int ifindex = dev->ifindex;
70ceb4f5
YH
3084 struct fib6_node *fn;
3085 struct rt6_info *rt = NULL;
c71099ac
TG
3086 struct fib6_table *table;
3087
830218c1 3088 table = fib6_get_table(net, tb_id);
38308473 3089 if (!table)
c71099ac 3090 return NULL;
70ceb4f5 3091
66f5d6ce 3092 rcu_read_lock();
38fbeeee 3093 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3094 if (!fn)
3095 goto out;
3096
66f5d6ce 3097 for_each_fib6_node_rt_rcu(fn) {
d1918542 3098 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
3099 continue;
3100 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3101 continue;
3102 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
3103 continue;
d3843fe5 3104 ip6_hold_safe(NULL, &rt, false);
70ceb4f5
YH
3105 break;
3106 }
3107out:
66f5d6ce 3108 rcu_read_unlock();
70ceb4f5
YH
3109 return rt;
3110}
3111
efa2cea0 3112static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 3113 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3114 const struct in6_addr *gwaddr,
3115 struct net_device *dev,
95c96174 3116 unsigned int pref)
70ceb4f5 3117{
86872cb5 3118 struct fib6_config cfg = {
238fc7ea 3119 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3120 .fc_ifindex = dev->ifindex,
86872cb5
TG
3121 .fc_dst_len = prefixlen,
3122 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3123 RTF_UP | RTF_PREF(pref),
b91d5329 3124 .fc_protocol = RTPROT_RA,
15e47304 3125 .fc_nlinfo.portid = 0,
efa2cea0
DL
3126 .fc_nlinfo.nlh = NULL,
3127 .fc_nlinfo.nl_net = net,
86872cb5
TG
3128 };
3129
830218c1 3130 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3131 cfg.fc_dst = *prefix;
3132 cfg.fc_gateway = *gwaddr;
70ceb4f5 3133
e317da96
YH
3134 /* We should treat it as a default route if prefix length is 0. */
3135 if (!prefixlen)
86872cb5 3136 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3137
333c4301 3138 ip6_route_add(&cfg, NULL);
70ceb4f5 3139
830218c1 3140 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3141}
3142#endif
3143
b71d1d42 3144struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 3145{
830218c1 3146 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
1da177e4 3147 struct rt6_info *rt;
c71099ac 3148 struct fib6_table *table;
1da177e4 3149
830218c1 3150 table = fib6_get_table(dev_net(dev), tb_id);
38308473 3151 if (!table)
c71099ac 3152 return NULL;
1da177e4 3153
66f5d6ce
WW
3154 rcu_read_lock();
3155 for_each_fib6_node_rt_rcu(&table->tb6_root) {
d1918542 3156 if (dev == rt->dst.dev &&
045927ff 3157 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
3158 ipv6_addr_equal(&rt->rt6i_gateway, addr))
3159 break;
3160 }
3161 if (rt)
d3843fe5 3162 ip6_hold_safe(NULL, &rt, false);
66f5d6ce 3163 rcu_read_unlock();
1da177e4
LT
3164 return rt;
3165}
3166
b71d1d42 3167struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
3168 struct net_device *dev,
3169 unsigned int pref)
1da177e4 3170{
86872cb5 3171 struct fib6_config cfg = {
ca254490 3172 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3173 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3174 .fc_ifindex = dev->ifindex,
3175 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3176 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3177 .fc_protocol = RTPROT_RA,
15e47304 3178 .fc_nlinfo.portid = 0,
5578689a 3179 .fc_nlinfo.nlh = NULL,
c346dca1 3180 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 3181 };
1da177e4 3182
4e3fd7a0 3183 cfg.fc_gateway = *gwaddr;
1da177e4 3184
333c4301 3185 if (!ip6_route_add(&cfg, NULL)) {
830218c1
DA
3186 struct fib6_table *table;
3187
3188 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3189 if (table)
3190 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3191 }
1da177e4 3192
1da177e4
LT
3193 return rt6_get_dflt_router(gwaddr, dev);
3194}
3195
830218c1 3196static void __rt6_purge_dflt_routers(struct fib6_table *table)
1da177e4
LT
3197{
3198 struct rt6_info *rt;
3199
3200restart:
66f5d6ce
WW
3201 rcu_read_lock();
3202 for_each_fib6_node_rt_rcu(&table->tb6_root) {
3e8b0ac3
LC
3203 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3204 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d3843fe5 3205 if (dst_hold_safe(&rt->dst)) {
66f5d6ce 3206 rcu_read_unlock();
d3843fe5
WW
3207 ip6_del_rt(rt);
3208 } else {
66f5d6ce 3209 rcu_read_unlock();
d3843fe5 3210 }
1da177e4
LT
3211 goto restart;
3212 }
3213 }
66f5d6ce 3214 rcu_read_unlock();
830218c1
DA
3215
3216 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3217}
3218
3219void rt6_purge_dflt_routers(struct net *net)
3220{
3221 struct fib6_table *table;
3222 struct hlist_head *head;
3223 unsigned int h;
3224
3225 rcu_read_lock();
3226
3227 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3228 head = &net->ipv6.fib_table_hash[h];
3229 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3230 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3231 __rt6_purge_dflt_routers(table);
3232 }
3233 }
3234
3235 rcu_read_unlock();
1da177e4
LT
3236}
3237
5578689a
DL
3238static void rtmsg_to_fib6_config(struct net *net,
3239 struct in6_rtmsg *rtmsg,
86872cb5
TG
3240 struct fib6_config *cfg)
3241{
3242 memset(cfg, 0, sizeof(*cfg));
3243
ca254490
DA
3244 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3245 : RT6_TABLE_MAIN;
86872cb5
TG
3246 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3247 cfg->fc_metric = rtmsg->rtmsg_metric;
3248 cfg->fc_expires = rtmsg->rtmsg_info;
3249 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3250 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3251 cfg->fc_flags = rtmsg->rtmsg_flags;
3252
5578689a 3253 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3254
4e3fd7a0
AD
3255 cfg->fc_dst = rtmsg->rtmsg_dst;
3256 cfg->fc_src = rtmsg->rtmsg_src;
3257 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3258}
3259
5578689a 3260int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3261{
86872cb5 3262 struct fib6_config cfg;
1da177e4
LT
3263 struct in6_rtmsg rtmsg;
3264 int err;
3265
67ba4152 3266 switch (cmd) {
1da177e4
LT
3267 case SIOCADDRT: /* Add a route */
3268 case SIOCDELRT: /* Delete a route */
af31f412 3269 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3270 return -EPERM;
3271 err = copy_from_user(&rtmsg, arg,
3272 sizeof(struct in6_rtmsg));
3273 if (err)
3274 return -EFAULT;
86872cb5 3275
5578689a 3276 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3277
1da177e4
LT
3278 rtnl_lock();
3279 switch (cmd) {
3280 case SIOCADDRT:
333c4301 3281 err = ip6_route_add(&cfg, NULL);
1da177e4
LT
3282 break;
3283 case SIOCDELRT:
333c4301 3284 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3285 break;
3286 default:
3287 err = -EINVAL;
3288 }
3289 rtnl_unlock();
3290
3291 return err;
3ff50b79 3292 }
1da177e4
LT
3293
3294 return -EINVAL;
3295}
3296
3297/*
3298 * Drop the packet on the floor
3299 */
3300
d5fdd6ba 3301static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3302{
612f09e8 3303 int type;
adf30907 3304 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3305 switch (ipstats_mib_noroutes) {
3306 case IPSTATS_MIB_INNOROUTES:
0660e03f 3307 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3308 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
3309 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3310 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3311 break;
3312 }
3313 /* FALLTHROUGH */
3314 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3315 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3316 ipstats_mib_noroutes);
612f09e8
YH
3317 break;
3318 }
3ffe533c 3319 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3320 kfree_skb(skb);
3321 return 0;
3322}
3323
9ce8ade0
TG
3324static int ip6_pkt_discard(struct sk_buff *skb)
3325{
612f09e8 3326 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3327}
3328
ede2059d 3329static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3330{
adf30907 3331 skb->dev = skb_dst(skb)->dev;
612f09e8 3332 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3333}
3334
9ce8ade0
TG
3335static int ip6_pkt_prohibit(struct sk_buff *skb)
3336{
612f09e8 3337 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3338}
3339
ede2059d 3340static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3341{
adf30907 3342 skb->dev = skb_dst(skb)->dev;
612f09e8 3343 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3344}
3345
1da177e4
LT
3346/*
3347 * Allocate a dst for local (unicast / anycast) address.
3348 */
3349
3350struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
3351 const struct in6_addr *addr,
8f031519 3352 bool anycast)
1da177e4 3353{
ca254490 3354 u32 tb_id;
c346dca1 3355 struct net *net = dev_net(idev->dev);
4832c30d 3356 struct net_device *dev = idev->dev;
5f02ce24
DA
3357 struct rt6_info *rt;
3358
5f02ce24 3359 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
a3300ef4 3360 if (!rt)
1da177e4
LT
3361 return ERR_PTR(-ENOMEM);
3362
1da177e4
LT
3363 in6_dev_hold(idev);
3364
11d53b49 3365 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
3366 rt->dst.input = ip6_input;
3367 rt->dst.output = ip6_output;
1da177e4 3368 rt->rt6i_idev = idev;
1da177e4 3369
94b5e0f9 3370 rt->rt6i_protocol = RTPROT_KERNEL;
1da177e4 3371 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
3372 if (anycast)
3373 rt->rt6i_flags |= RTF_ANYCAST;
3374 else
1da177e4 3375 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 3376
550bab42 3377 rt->rt6i_gateway = *addr;
4e3fd7a0 3378 rt->rt6i_dst.addr = *addr;
1da177e4 3379 rt->rt6i_dst.plen = 128;
ca254490
DA
3380 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3381 rt->rt6i_table = fib6_get_table(net, tb_id);
1da177e4 3382
1da177e4
LT
3383 return rt;
3384}
3385
c3968a85
DW
3386/* remove deleted ip from prefsrc entries */
3387struct arg_dev_net_ip {
3388 struct net_device *dev;
3389 struct net *net;
3390 struct in6_addr *addr;
3391};
3392
3393static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3394{
3395 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3396 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3397 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3398
d1918542 3399 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
3400 rt != net->ipv6.ip6_null_entry &&
3401 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
60006a48 3402 spin_lock_bh(&rt6_exception_lock);
c3968a85
DW
3403 /* remove prefsrc entry */
3404 rt->rt6i_prefsrc.plen = 0;
60006a48
WW
3405 /* need to update cache as well */
3406 rt6_exceptions_remove_prefsrc(rt);
3407 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3408 }
3409 return 0;
3410}
3411
3412void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3413{
3414 struct net *net = dev_net(ifp->idev->dev);
3415 struct arg_dev_net_ip adni = {
3416 .dev = ifp->idev->dev,
3417 .net = net,
3418 .addr = &ifp->addr,
3419 };
0c3584d5 3420 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3421}
3422
be7a010d 3423#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3424
3425/* Remove routers and update dst entries when gateway turn into host. */
3426static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3427{
3428 struct in6_addr *gateway = (struct in6_addr *)arg;
3429
2b760fcf
WW
3430 if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3431 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
be7a010d
DJ
3432 return -1;
3433 }
b16cb459
WW
3434
3435 /* Further clean up cached routes in exception table.
3436 * This is needed because cached route may have a different
3437 * gateway than its 'parent' in the case of an ip redirect.
3438 */
3439 rt6_exceptions_clean_tohost(rt, gateway);
3440
be7a010d
DJ
3441 return 0;
3442}
3443
3444void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3445{
3446 fib6_clean_all(net, fib6_clean_tohost, gateway);
3447}
3448
8ed67789
DL
3449struct arg_dev_net {
3450 struct net_device *dev;
3451 struct net *net;
3452};
3453
a1a22c12 3454/* called with write lock held for table with rt */
1da177e4
LT
3455static int fib6_ifdown(struct rt6_info *rt, void *arg)
3456{
bc3ef660 3457 const struct arg_dev_net *adn = arg;
3458 const struct net_device *dev = adn->dev;
8ed67789 3459
d1918542 3460 if ((rt->dst.dev == dev || !dev) &&
a1a22c12
DA
3461 rt != adn->net->ipv6.ip6_null_entry &&
3462 (rt->rt6i_nsiblings == 0 ||
8397ed36 3463 (dev && netdev_unregistering(dev)) ||
a1a22c12 3464 !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
1da177e4 3465 return -1;
c159d30c 3466
1da177e4
LT
3467 return 0;
3468}
3469
f3db4851 3470void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 3471{
8ed67789
DL
3472 struct arg_dev_net adn = {
3473 .dev = dev,
3474 .net = net,
3475 };
3476
0c3584d5 3477 fib6_clean_all(net, fib6_ifdown, &adn);
e332bc67
EB
3478 if (dev)
3479 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
3480}
3481
95c96174 3482struct rt6_mtu_change_arg {
1da177e4 3483 struct net_device *dev;
95c96174 3484 unsigned int mtu;
1da177e4
LT
3485};
3486
3487static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
3488{
3489 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
3490 struct inet6_dev *idev;
3491
3492 /* In IPv6 pmtu discovery is not optional,
3493 so that RTAX_MTU lock cannot disable it.
3494 We still use this lock to block changes
3495 caused by addrconf/ndisc.
3496 */
3497
3498 idev = __in6_dev_get(arg->dev);
38308473 3499 if (!idev)
1da177e4
LT
3500 return 0;
3501
3502 /* For administrative MTU increase, there is no way to discover
3503 IPv6 PMTU increase, so PMTU increase should be updated here.
3504 Since RFC 1981 doesn't include administrative MTU increase
3505 update PMTU increase is a MUST. (i.e. jumbo frame)
3506 */
3507 /*
3508 If new MTU is less than route PMTU, this new MTU will be the
3509 lowest MTU in the path, update the route PMTU to reflect PMTU
3510 decreases; if new MTU is greater than route PMTU, and the
3511 old MTU is the lowest MTU in the path, update the route PMTU
3512 to reflect the increase. In this case if the other nodes' MTU
3513 also have the lowest MTU, TOO BIG MESSAGE will be lead to
67c408cf 3514 PMTU discovery.
1da177e4 3515 */
d1918542 3516 if (rt->dst.dev == arg->dev &&
fb56be83 3517 dst_metric_raw(&rt->dst, RTAX_MTU) &&
4b32b5ad 3518 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
f5bbe7ee 3519 spin_lock_bh(&rt6_exception_lock);
2b760fcf
WW
3520 if (dst_mtu(&rt->dst) >= arg->mtu ||
3521 (dst_mtu(&rt->dst) < arg->mtu &&
3522 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
4b32b5ad
MKL
3523 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
3524 }
f5bbe7ee
WW
3525 rt6_exceptions_update_pmtu(rt, arg->mtu);
3526 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 3527 }
1da177e4
LT
3528 return 0;
3529}
3530
95c96174 3531void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 3532{
c71099ac
TG
3533 struct rt6_mtu_change_arg arg = {
3534 .dev = dev,
3535 .mtu = mtu,
3536 };
1da177e4 3537
0c3584d5 3538 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
3539}
3540
ef7c79ed 3541static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 3542 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 3543 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 3544 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
3545 [RTA_PRIORITY] = { .type = NLA_U32 },
3546 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 3547 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 3548 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
3549 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
3550 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 3551 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 3552 [RTA_UID] = { .type = NLA_U32 },
3b45a410 3553 [RTA_MARK] = { .type = NLA_U32 },
86872cb5
TG
3554};
3555
3556static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
3557 struct fib6_config *cfg,
3558 struct netlink_ext_ack *extack)
1da177e4 3559{
86872cb5
TG
3560 struct rtmsg *rtm;
3561 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 3562 unsigned int pref;
86872cb5 3563 int err;
1da177e4 3564
fceb6435
JB
3565 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3566 NULL);
86872cb5
TG
3567 if (err < 0)
3568 goto errout;
1da177e4 3569
86872cb5
TG
3570 err = -EINVAL;
3571 rtm = nlmsg_data(nlh);
3572 memset(cfg, 0, sizeof(*cfg));
3573
3574 cfg->fc_table = rtm->rtm_table;
3575 cfg->fc_dst_len = rtm->rtm_dst_len;
3576 cfg->fc_src_len = rtm->rtm_src_len;
3577 cfg->fc_flags = RTF_UP;
3578 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 3579 cfg->fc_type = rtm->rtm_type;
86872cb5 3580
ef2c7d7b
ND
3581 if (rtm->rtm_type == RTN_UNREACHABLE ||
3582 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
3583 rtm->rtm_type == RTN_PROHIBIT ||
3584 rtm->rtm_type == RTN_THROW)
86872cb5
TG
3585 cfg->fc_flags |= RTF_REJECT;
3586
ab79ad14
3587 if (rtm->rtm_type == RTN_LOCAL)
3588 cfg->fc_flags |= RTF_LOCAL;
3589
1f56a01f
MKL
3590 if (rtm->rtm_flags & RTM_F_CLONED)
3591 cfg->fc_flags |= RTF_CACHE;
3592
15e47304 3593 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 3594 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 3595 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
3596
3597 if (tb[RTA_GATEWAY]) {
67b61f6c 3598 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 3599 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 3600 }
86872cb5
TG
3601
3602 if (tb[RTA_DST]) {
3603 int plen = (rtm->rtm_dst_len + 7) >> 3;
3604
3605 if (nla_len(tb[RTA_DST]) < plen)
3606 goto errout;
3607
3608 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 3609 }
86872cb5
TG
3610
3611 if (tb[RTA_SRC]) {
3612 int plen = (rtm->rtm_src_len + 7) >> 3;
3613
3614 if (nla_len(tb[RTA_SRC]) < plen)
3615 goto errout;
3616
3617 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 3618 }
86872cb5 3619
c3968a85 3620 if (tb[RTA_PREFSRC])
67b61f6c 3621 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 3622
86872cb5
TG
3623 if (tb[RTA_OIF])
3624 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
3625
3626 if (tb[RTA_PRIORITY])
3627 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
3628
3629 if (tb[RTA_METRICS]) {
3630 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
3631 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 3632 }
86872cb5
TG
3633
3634 if (tb[RTA_TABLE])
3635 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
3636
51ebd318
ND
3637 if (tb[RTA_MULTIPATH]) {
3638 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
3639 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
3640
3641 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 3642 cfg->fc_mp_len, extack);
9ed59592
DA
3643 if (err < 0)
3644 goto errout;
51ebd318
ND
3645 }
3646
c78ba6d6
LR
3647 if (tb[RTA_PREF]) {
3648 pref = nla_get_u8(tb[RTA_PREF]);
3649 if (pref != ICMPV6_ROUTER_PREF_LOW &&
3650 pref != ICMPV6_ROUTER_PREF_HIGH)
3651 pref = ICMPV6_ROUTER_PREF_MEDIUM;
3652 cfg->fc_flags |= RTF_PREF(pref);
3653 }
3654
19e42e45
RP
3655 if (tb[RTA_ENCAP])
3656 cfg->fc_encap = tb[RTA_ENCAP];
3657
9ed59592 3658 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
3659 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3660
c255bd68 3661 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
3662 if (err < 0)
3663 goto errout;
3664 }
3665
32bc201e
XL
3666 if (tb[RTA_EXPIRES]) {
3667 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3668
3669 if (addrconf_finite_timeout(timeout)) {
3670 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3671 cfg->fc_flags |= RTF_EXPIRES;
3672 }
3673 }
3674
86872cb5
TG
3675 err = 0;
3676errout:
3677 return err;
1da177e4
LT
3678}
3679
6b9ea5a6
RP
3680struct rt6_nh {
3681 struct rt6_info *rt6_info;
3682 struct fib6_config r_cfg;
3683 struct mx6_config mxc;
3684 struct list_head next;
3685};
3686
3687static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3688{
3689 struct rt6_nh *nh;
3690
3691 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 3692 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
3693 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3694 nh->r_cfg.fc_ifindex);
3695 }
3696}
3697
3698static int ip6_route_info_append(struct list_head *rt6_nh_list,
3699 struct rt6_info *rt, struct fib6_config *r_cfg)
3700{
3701 struct rt6_nh *nh;
6b9ea5a6
RP
3702 int err = -EEXIST;
3703
3704 list_for_each_entry(nh, rt6_nh_list, next) {
3705 /* check if rt6_info already exists */
f06b7549 3706 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
6b9ea5a6
RP
3707 return err;
3708 }
3709
3710 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3711 if (!nh)
3712 return -ENOMEM;
3713 nh->rt6_info = rt;
3714 err = ip6_convert_metrics(&nh->mxc, r_cfg);
3715 if (err) {
3716 kfree(nh);
3717 return err;
3718 }
3719 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3720 list_add_tail(&nh->next, rt6_nh_list);
3721
3722 return 0;
3723}
3724
3b1137fe
DA
3725static void ip6_route_mpath_notify(struct rt6_info *rt,
3726 struct rt6_info *rt_last,
3727 struct nl_info *info,
3728 __u16 nlflags)
3729{
3730 /* if this is an APPEND route, then rt points to the first route
3731 * inserted and rt_last points to last route inserted. Userspace
3732 * wants a consistent dump of the route which starts at the first
3733 * nexthop. Since sibling routes are always added at the end of
3734 * the list, find the first sibling of the last route appended
3735 */
3736 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3737 rt = list_first_entry(&rt_last->rt6i_siblings,
3738 struct rt6_info,
3739 rt6i_siblings);
3740 }
3741
3742 if (rt)
3743 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3744}
3745
333c4301
DA
3746static int ip6_route_multipath_add(struct fib6_config *cfg,
3747 struct netlink_ext_ack *extack)
51ebd318 3748{
3b1137fe
DA
3749 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3750 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
3751 struct fib6_config r_cfg;
3752 struct rtnexthop *rtnh;
6b9ea5a6
RP
3753 struct rt6_info *rt;
3754 struct rt6_nh *err_nh;
3755 struct rt6_nh *nh, *nh_safe;
3b1137fe 3756 __u16 nlflags;
51ebd318
ND
3757 int remaining;
3758 int attrlen;
6b9ea5a6
RP
3759 int err = 1;
3760 int nhn = 0;
3761 int replace = (cfg->fc_nlinfo.nlh &&
3762 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3763 LIST_HEAD(rt6_nh_list);
51ebd318 3764
3b1137fe
DA
3765 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3766 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3767 nlflags |= NLM_F_APPEND;
3768
35f1b4e9 3769 remaining = cfg->fc_mp_len;
51ebd318 3770 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 3771
6b9ea5a6
RP
3772 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
3773 * rt6_info structs per nexthop
3774 */
51ebd318
ND
3775 while (rtnh_ok(rtnh, remaining)) {
3776 memcpy(&r_cfg, cfg, sizeof(*cfg));
3777 if (rtnh->rtnh_ifindex)
3778 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3779
3780 attrlen = rtnh_attrlen(rtnh);
3781 if (attrlen > 0) {
3782 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3783
3784 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3785 if (nla) {
67b61f6c 3786 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
3787 r_cfg.fc_flags |= RTF_GATEWAY;
3788 }
19e42e45
RP
3789 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3790 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3791 if (nla)
3792 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 3793 }
6b9ea5a6 3794
333c4301 3795 rt = ip6_route_info_create(&r_cfg, extack);
8c5b83f0
RP
3796 if (IS_ERR(rt)) {
3797 err = PTR_ERR(rt);
3798 rt = NULL;
6b9ea5a6 3799 goto cleanup;
8c5b83f0 3800 }
6b9ea5a6
RP
3801
3802 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 3803 if (err) {
587fea74 3804 dst_release_immediate(&rt->dst);
6b9ea5a6
RP
3805 goto cleanup;
3806 }
3807
3808 rtnh = rtnh_next(rtnh, &remaining);
3809 }
3810
3b1137fe
DA
3811 /* for add and replace send one notification with all nexthops.
3812 * Skip the notification in fib6_add_rt2node and send one with
3813 * the full route when done
3814 */
3815 info->skip_notify = 1;
3816
6b9ea5a6
RP
3817 err_nh = NULL;
3818 list_for_each_entry(nh, &rt6_nh_list, next) {
3b1137fe 3819 rt_last = nh->rt6_info;
333c4301 3820 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3b1137fe
DA
3821 /* save reference to first route for notification */
3822 if (!rt_notif && !err)
3823 rt_notif = nh->rt6_info;
3824
6b9ea5a6
RP
3825 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3826 nh->rt6_info = NULL;
3827 if (err) {
3828 if (replace && nhn)
3829 ip6_print_replace_route_err(&rt6_nh_list);
3830 err_nh = nh;
3831 goto add_errout;
51ebd318 3832 }
6b9ea5a6 3833
1a72418b 3834 /* Because each route is added like a single route we remove
27596472
MK
3835 * these flags after the first nexthop: if there is a collision,
3836 * we have already failed to add the first nexthop:
3837 * fib6_add_rt2node() has rejected it; when replacing, old
3838 * nexthops have been replaced by first new, the rest should
3839 * be added to it.
1a72418b 3840 */
27596472
MK
3841 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3842 NLM_F_REPLACE);
6b9ea5a6
RP
3843 nhn++;
3844 }
3845
3b1137fe
DA
3846 /* success ... tell user about new route */
3847 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
3848 goto cleanup;
3849
3850add_errout:
3b1137fe
DA
3851 /* send notification for routes that were added so that
3852 * the delete notifications sent by ip6_route_del are
3853 * coherent
3854 */
3855 if (rt_notif)
3856 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3857
6b9ea5a6
RP
3858 /* Delete routes that were already added */
3859 list_for_each_entry(nh, &rt6_nh_list, next) {
3860 if (err_nh == nh)
3861 break;
333c4301 3862 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
3863 }
3864
3865cleanup:
3866 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
587fea74
WW
3867 if (nh->rt6_info)
3868 dst_release_immediate(&nh->rt6_info->dst);
52fe51f8 3869 kfree(nh->mxc.mx);
6b9ea5a6
RP
3870 list_del(&nh->next);
3871 kfree(nh);
3872 }
3873
3874 return err;
3875}
3876
333c4301
DA
3877static int ip6_route_multipath_del(struct fib6_config *cfg,
3878 struct netlink_ext_ack *extack)
6b9ea5a6
RP
3879{
3880 struct fib6_config r_cfg;
3881 struct rtnexthop *rtnh;
3882 int remaining;
3883 int attrlen;
3884 int err = 1, last_err = 0;
3885
3886 remaining = cfg->fc_mp_len;
3887 rtnh = (struct rtnexthop *)cfg->fc_mp;
3888
3889 /* Parse a Multipath Entry */
3890 while (rtnh_ok(rtnh, remaining)) {
3891 memcpy(&r_cfg, cfg, sizeof(*cfg));
3892 if (rtnh->rtnh_ifindex)
3893 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3894
3895 attrlen = rtnh_attrlen(rtnh);
3896 if (attrlen > 0) {
3897 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3898
3899 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3900 if (nla) {
3901 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3902 r_cfg.fc_flags |= RTF_GATEWAY;
3903 }
3904 }
333c4301 3905 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
3906 if (err)
3907 last_err = err;
3908
51ebd318
ND
3909 rtnh = rtnh_next(rtnh, &remaining);
3910 }
3911
3912 return last_err;
3913}
3914
c21ef3e3
DA
3915static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3916 struct netlink_ext_ack *extack)
1da177e4 3917{
86872cb5
TG
3918 struct fib6_config cfg;
3919 int err;
1da177e4 3920
333c4301 3921 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
3922 if (err < 0)
3923 return err;
3924
51ebd318 3925 if (cfg.fc_mp)
333c4301 3926 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
3927 else {
3928 cfg.fc_delete_all_nh = 1;
333c4301 3929 return ip6_route_del(&cfg, extack);
0ae81335 3930 }
1da177e4
LT
3931}
3932
c21ef3e3
DA
3933static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3934 struct netlink_ext_ack *extack)
1da177e4 3935{
86872cb5
TG
3936 struct fib6_config cfg;
3937 int err;
1da177e4 3938
333c4301 3939 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
3940 if (err < 0)
3941 return err;
3942
51ebd318 3943 if (cfg.fc_mp)
333c4301 3944 return ip6_route_multipath_add(&cfg, extack);
51ebd318 3945 else
333c4301 3946 return ip6_route_add(&cfg, extack);
1da177e4
LT
3947}
3948
beb1afac 3949static size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f 3950{
beb1afac
DA
3951 int nexthop_len = 0;
3952
3953 if (rt->rt6i_nsiblings) {
3954 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
3955 + NLA_ALIGN(sizeof(struct rtnexthop))
3956 + nla_total_size(16) /* RTA_GATEWAY */
beb1afac
DA
3957 + lwtunnel_get_encap_size(rt->dst.lwtstate);
3958
3959 nexthop_len *= rt->rt6i_nsiblings;
3960 }
3961
339bf98f
TG
3962 return NLMSG_ALIGN(sizeof(struct rtmsg))
3963 + nla_total_size(16) /* RTA_SRC */
3964 + nla_total_size(16) /* RTA_DST */
3965 + nla_total_size(16) /* RTA_GATEWAY */
3966 + nla_total_size(16) /* RTA_PREFSRC */
3967 + nla_total_size(4) /* RTA_TABLE */
3968 + nla_total_size(4) /* RTA_IIF */
3969 + nla_total_size(4) /* RTA_OIF */
3970 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 3971 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 3972 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 3973 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 3974 + nla_total_size(1) /* RTA_PREF */
beb1afac
DA
3975 + lwtunnel_get_encap_size(rt->dst.lwtstate)
3976 + nexthop_len;
3977}
3978
3979static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
5be083ce 3980 unsigned int *flags, bool skip_oif)
beb1afac
DA
3981{
3982 if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3983 *flags |= RTNH_F_LINKDOWN;
3984 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3985 *flags |= RTNH_F_DEAD;
3986 }
3987
3988 if (rt->rt6i_flags & RTF_GATEWAY) {
3989 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3990 goto nla_put_failure;
3991 }
3992
fe400799 3993 if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
3994 *flags |= RTNH_F_OFFLOAD;
3995
5be083ce
DA
3996 /* not needed for multipath encoding b/c it has a rtnexthop struct */
3997 if (!skip_oif && rt->dst.dev &&
beb1afac
DA
3998 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3999 goto nla_put_failure;
4000
4001 if (rt->dst.lwtstate &&
4002 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
4003 goto nla_put_failure;
4004
4005 return 0;
4006
4007nla_put_failure:
4008 return -EMSGSIZE;
4009}
4010
5be083ce 4011/* add multipath next hop */
beb1afac
DA
4012static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
4013{
4014 struct rtnexthop *rtnh;
4015 unsigned int flags = 0;
4016
4017 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4018 if (!rtnh)
4019 goto nla_put_failure;
4020
4021 rtnh->rtnh_hops = 0;
4022 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
4023
5be083ce 4024 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4025 goto nla_put_failure;
4026
4027 rtnh->rtnh_flags = flags;
4028
4029 /* length of rtnetlink header + attributes */
4030 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4031
4032 return 0;
4033
4034nla_put_failure:
4035 return -EMSGSIZE;
339bf98f
TG
4036}
4037
191cd582
BH
4038static int rt6_fill_node(struct net *net,
4039 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 4040 struct in6_addr *dst, struct in6_addr *src,
15e47304 4041 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4042 unsigned int flags)
1da177e4 4043{
4b32b5ad 4044 u32 metrics[RTAX_MAX];
1da177e4 4045 struct rtmsg *rtm;
2d7202bf 4046 struct nlmsghdr *nlh;
e3703b3d 4047 long expires;
9e762a4a 4048 u32 table;
1da177e4 4049
15e47304 4050 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4051 if (!nlh)
26932566 4052 return -EMSGSIZE;
2d7202bf
TG
4053
4054 rtm = nlmsg_data(nlh);
1da177e4
LT
4055 rtm->rtm_family = AF_INET6;
4056 rtm->rtm_dst_len = rt->rt6i_dst.plen;
4057 rtm->rtm_src_len = rt->rt6i_src.plen;
4058 rtm->rtm_tos = 0;
c71099ac 4059 if (rt->rt6i_table)
9e762a4a 4060 table = rt->rt6i_table->tb6_id;
c71099ac 4061 else
9e762a4a
PM
4062 table = RT6_TABLE_UNSPEC;
4063 rtm->rtm_table = table;
c78679e8
DM
4064 if (nla_put_u32(skb, RTA_TABLE, table))
4065 goto nla_put_failure;
ef2c7d7b
ND
4066 if (rt->rt6i_flags & RTF_REJECT) {
4067 switch (rt->dst.error) {
4068 case -EINVAL:
4069 rtm->rtm_type = RTN_BLACKHOLE;
4070 break;
4071 case -EACCES:
4072 rtm->rtm_type = RTN_PROHIBIT;
4073 break;
b4949ab2
ND
4074 case -EAGAIN:
4075 rtm->rtm_type = RTN_THROW;
4076 break;
ef2c7d7b
ND
4077 default:
4078 rtm->rtm_type = RTN_UNREACHABLE;
4079 break;
4080 }
4081 }
38308473 4082 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 4083 rtm->rtm_type = RTN_LOCAL;
4ee39733
DA
4084 else if (rt->rt6i_flags & RTF_ANYCAST)
4085 rtm->rtm_type = RTN_ANYCAST;
d1918542 4086 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
4087 rtm->rtm_type = RTN_LOCAL;
4088 else
4089 rtm->rtm_type = RTN_UNICAST;
4090 rtm->rtm_flags = 0;
4091 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4092 rtm->rtm_protocol = rt->rt6i_protocol;
1da177e4 4093
38308473 4094 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
4095 rtm->rtm_flags |= RTM_F_CLONED;
4096
4097 if (dst) {
930345ea 4098 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 4099 goto nla_put_failure;
1ab1457c 4100 rtm->rtm_dst_len = 128;
1da177e4 4101 } else if (rtm->rtm_dst_len)
930345ea 4102 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 4103 goto nla_put_failure;
1da177e4
LT
4104#ifdef CONFIG_IPV6_SUBTREES
4105 if (src) {
930345ea 4106 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4107 goto nla_put_failure;
1ab1457c 4108 rtm->rtm_src_len = 128;
c78679e8 4109 } else if (rtm->rtm_src_len &&
930345ea 4110 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 4111 goto nla_put_failure;
1da177e4 4112#endif
7bc570c8
YH
4113 if (iif) {
4114#ifdef CONFIG_IPV6_MROUTE
4115 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
fd61c6ba
DA
4116 int err = ip6mr_get_route(net, skb, rtm, portid);
4117
4118 if (err == 0)
4119 return 0;
4120 if (err < 0)
4121 goto nla_put_failure;
7bc570c8
YH
4122 } else
4123#endif
c78679e8
DM
4124 if (nla_put_u32(skb, RTA_IIF, iif))
4125 goto nla_put_failure;
7bc570c8 4126 } else if (dst) {
1da177e4 4127 struct in6_addr saddr_buf;
c78679e8 4128 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 4129 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4130 goto nla_put_failure;
1da177e4 4131 }
2d7202bf 4132
c3968a85
DW
4133 if (rt->rt6i_prefsrc.plen) {
4134 struct in6_addr saddr_buf;
4e3fd7a0 4135 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 4136 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4137 goto nla_put_failure;
c3968a85
DW
4138 }
4139
4b32b5ad
MKL
4140 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
4141 if (rt->rt6i_pmtu)
4142 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
4143 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
4144 goto nla_put_failure;
4145
c78679e8
DM
4146 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4147 goto nla_put_failure;
8253947e 4148
beb1afac
DA
4149 /* For multipath routes, walk the siblings list and add
4150 * each as a nexthop within RTA_MULTIPATH.
4151 */
4152 if (rt->rt6i_nsiblings) {
4153 struct rt6_info *sibling, *next_sibling;
4154 struct nlattr *mp;
4155
4156 mp = nla_nest_start(skb, RTA_MULTIPATH);
4157 if (!mp)
4158 goto nla_put_failure;
4159
4160 if (rt6_add_nexthop(skb, rt) < 0)
4161 goto nla_put_failure;
4162
4163 list_for_each_entry_safe(sibling, next_sibling,
4164 &rt->rt6i_siblings, rt6i_siblings) {
4165 if (rt6_add_nexthop(skb, sibling) < 0)
4166 goto nla_put_failure;
4167 }
4168
4169 nla_nest_end(skb, mp);
4170 } else {
5be083ce 4171 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4172 goto nla_put_failure;
4173 }
4174
8253947e 4175 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 4176
87a50699 4177 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 4178 goto nla_put_failure;
2d7202bf 4179
c78ba6d6
LR
4180 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4181 goto nla_put_failure;
4182
19e42e45 4183
053c095a
JB
4184 nlmsg_end(skb, nlh);
4185 return 0;
2d7202bf
TG
4186
4187nla_put_failure:
26932566
PM
4188 nlmsg_cancel(skb, nlh);
4189 return -EMSGSIZE;
1da177e4
LT
4190}
4191
1b43af54 4192int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
4193{
4194 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4195 struct net *net = arg->net;
4196
4197 if (rt == net->ipv6.ip6_null_entry)
4198 return 0;
1da177e4 4199
2d7202bf
TG
4200 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4201 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4202
4203 /* user wants prefix routes only */
4204 if (rtm->rtm_flags & RTM_F_PREFIX &&
4205 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4206 /* success since this is not a prefix route */
4207 return 1;
4208 }
4209 }
1da177e4 4210
1f17e2f2 4211 return rt6_fill_node(net,
191cd582 4212 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 4213 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
f8cfe2ce 4214 NLM_F_MULTI);
1da177e4
LT
4215}
4216
c21ef3e3
DA
4217static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4218 struct netlink_ext_ack *extack)
1da177e4 4219{
3b1e0a65 4220 struct net *net = sock_net(in_skb->sk);
ab364a6f 4221 struct nlattr *tb[RTA_MAX+1];
18c3a61c
RP
4222 int err, iif = 0, oif = 0;
4223 struct dst_entry *dst;
ab364a6f 4224 struct rt6_info *rt;
1da177e4 4225 struct sk_buff *skb;
ab364a6f 4226 struct rtmsg *rtm;
4c9483b2 4227 struct flowi6 fl6;
18c3a61c 4228 bool fibmatch;
1da177e4 4229
fceb6435 4230 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4231 extack);
ab364a6f
TG
4232 if (err < 0)
4233 goto errout;
1da177e4 4234
ab364a6f 4235 err = -EINVAL;
4c9483b2 4236 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4237 rtm = nlmsg_data(nlh);
4238 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4239 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4240
ab364a6f
TG
4241 if (tb[RTA_SRC]) {
4242 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4243 goto errout;
4244
4e3fd7a0 4245 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4246 }
4247
4248 if (tb[RTA_DST]) {
4249 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4250 goto errout;
4251
4e3fd7a0 4252 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4253 }
4254
4255 if (tb[RTA_IIF])
4256 iif = nla_get_u32(tb[RTA_IIF]);
4257
4258 if (tb[RTA_OIF])
72331bc0 4259 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4260
2e47b291
LC
4261 if (tb[RTA_MARK])
4262 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4263
622ec2c9
LC
4264 if (tb[RTA_UID])
4265 fl6.flowi6_uid = make_kuid(current_user_ns(),
4266 nla_get_u32(tb[RTA_UID]));
4267 else
4268 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4269
1da177e4
LT
4270 if (iif) {
4271 struct net_device *dev;
72331bc0
SL
4272 int flags = 0;
4273
121622db
FW
4274 rcu_read_lock();
4275
4276 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4277 if (!dev) {
121622db 4278 rcu_read_unlock();
1da177e4 4279 err = -ENODEV;
ab364a6f 4280 goto errout;
1da177e4 4281 }
72331bc0
SL
4282
4283 fl6.flowi6_iif = iif;
4284
4285 if (!ipv6_addr_any(&fl6.saddr))
4286 flags |= RT6_LOOKUP_F_HAS_SADDR;
4287
18c3a61c
RP
4288 if (!fibmatch)
4289 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
401481e0
AB
4290 else
4291 dst = ip6_route_lookup(net, &fl6, 0);
121622db
FW
4292
4293 rcu_read_unlock();
72331bc0
SL
4294 } else {
4295 fl6.flowi6_oif = oif;
4296
18c3a61c
RP
4297 if (!fibmatch)
4298 dst = ip6_route_output(net, NULL, &fl6);
401481e0
AB
4299 else
4300 dst = ip6_route_lookup(net, &fl6, 0);
18c3a61c
RP
4301 }
4302
18c3a61c
RP
4303
4304 rt = container_of(dst, struct rt6_info, dst);
4305 if (rt->dst.error) {
4306 err = rt->dst.error;
4307 ip6_rt_put(rt);
4308 goto errout;
1da177e4
LT
4309 }
4310
9d6acb3b
WC
4311 if (rt == net->ipv6.ip6_null_entry) {
4312 err = rt->dst.error;
4313 ip6_rt_put(rt);
4314 goto errout;
4315 }
4316
ab364a6f 4317 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4318 if (!skb) {
94e187c0 4319 ip6_rt_put(rt);
ab364a6f
TG
4320 err = -ENOBUFS;
4321 goto errout;
4322 }
1da177e4 4323
d8d1f30b 4324 skb_dst_set(skb, &rt->dst);
18c3a61c
RP
4325 if (fibmatch)
4326 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
4327 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4328 nlh->nlmsg_seq, 0);
4329 else
4330 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
4331 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4332 nlh->nlmsg_seq, 0);
1da177e4 4333 if (err < 0) {
ab364a6f
TG
4334 kfree_skb(skb);
4335 goto errout;
1da177e4
LT
4336 }
4337
15e47304 4338 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4339errout:
1da177e4 4340 return err;
1da177e4
LT
4341}
4342
37a1d361
RP
4343void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4344 unsigned int nlm_flags)
1da177e4
LT
4345{
4346 struct sk_buff *skb;
5578689a 4347 struct net *net = info->nl_net;
528c4ceb
DL
4348 u32 seq;
4349 int err;
4350
4351 err = -ENOBUFS;
38308473 4352 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4353
19e42e45 4354 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4355 if (!skb)
21713ebc
TG
4356 goto errout;
4357
191cd582 4358 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
f8cfe2ce 4359 event, info->portid, seq, nlm_flags);
26932566
PM
4360 if (err < 0) {
4361 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4362 WARN_ON(err == -EMSGSIZE);
4363 kfree_skb(skb);
4364 goto errout;
4365 }
15e47304 4366 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4367 info->nlh, gfp_any());
4368 return;
21713ebc
TG
4369errout:
4370 if (err < 0)
5578689a 4371 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4372}
4373
8ed67789 4374static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4375 unsigned long event, void *ptr)
8ed67789 4376{
351638e7 4377 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 4378 struct net *net = dev_net(dev);
8ed67789 4379
242d3a49
WC
4380 if (!(dev->flags & IFF_LOOPBACK))
4381 return NOTIFY_OK;
4382
4383 if (event == NETDEV_REGISTER) {
d8d1f30b 4384 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
4385 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4386#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 4387 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 4388 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4389 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 4390 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 4391#endif
76da0704
WC
4392 } else if (event == NETDEV_UNREGISTER &&
4393 dev->reg_state != NETREG_UNREGISTERED) {
4394 /* NETDEV_UNREGISTER could be fired for multiple times by
4395 * netdev_wait_allrefs(). Make sure we only call this once.
4396 */
12d94a80 4397 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 4398#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
4399 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4400 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
4401#endif
4402 }
4403
4404 return NOTIFY_OK;
4405}
4406
1da177e4
LT
4407/*
4408 * /proc
4409 */
4410
4411#ifdef CONFIG_PROC_FS
4412
33120b30
AD
4413static const struct file_operations ipv6_route_proc_fops = {
4414 .owner = THIS_MODULE,
4415 .open = ipv6_route_open,
4416 .read = seq_read,
4417 .llseek = seq_lseek,
8d2ca1d7 4418 .release = seq_release_net,
33120b30
AD
4419};
4420
1da177e4
LT
4421static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4422{
69ddb805 4423 struct net *net = (struct net *)seq->private;
1da177e4 4424 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
4425 net->ipv6.rt6_stats->fib_nodes,
4426 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 4427 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
4428 net->ipv6.rt6_stats->fib_rt_entries,
4429 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 4430 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 4431 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
4432
4433 return 0;
4434}
4435
4436static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4437{
de05c557 4438 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
4439}
4440
9a32144e 4441static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
4442 .owner = THIS_MODULE,
4443 .open = rt6_stats_seq_open,
4444 .read = seq_read,
4445 .llseek = seq_lseek,
b6fcbdb4 4446 .release = single_release_net,
1da177e4
LT
4447};
4448#endif /* CONFIG_PROC_FS */
4449
4450#ifdef CONFIG_SYSCTL
4451
1da177e4 4452static
fe2c6338 4453int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
4454 void __user *buffer, size_t *lenp, loff_t *ppos)
4455{
c486da34
LAG
4456 struct net *net;
4457 int delay;
4458 if (!write)
1da177e4 4459 return -EINVAL;
c486da34
LAG
4460
4461 net = (struct net *)ctl->extra1;
4462 delay = net->ipv6.sysctl.flush_delay;
4463 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 4464 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 4465 return 0;
1da177e4
LT
4466}
4467
fe2c6338 4468struct ctl_table ipv6_route_table_template[] = {
1ab1457c 4469 {
1da177e4 4470 .procname = "flush",
4990509f 4471 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 4472 .maxlen = sizeof(int),
89c8b3a1 4473 .mode = 0200,
6d9f239a 4474 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
4475 },
4476 {
1da177e4 4477 .procname = "gc_thresh",
9a7ec3a9 4478 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
4479 .maxlen = sizeof(int),
4480 .mode = 0644,
6d9f239a 4481 .proc_handler = proc_dointvec,
1da177e4
LT
4482 },
4483 {
1da177e4 4484 .procname = "max_size",
4990509f 4485 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
4486 .maxlen = sizeof(int),
4487 .mode = 0644,
6d9f239a 4488 .proc_handler = proc_dointvec,
1da177e4
LT
4489 },
4490 {
1da177e4 4491 .procname = "gc_min_interval",
4990509f 4492 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4493 .maxlen = sizeof(int),
4494 .mode = 0644,
6d9f239a 4495 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4496 },
4497 {
1da177e4 4498 .procname = "gc_timeout",
4990509f 4499 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
4500 .maxlen = sizeof(int),
4501 .mode = 0644,
6d9f239a 4502 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4503 },
4504 {
1da177e4 4505 .procname = "gc_interval",
4990509f 4506 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
4507 .maxlen = sizeof(int),
4508 .mode = 0644,
6d9f239a 4509 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4510 },
4511 {
1da177e4 4512 .procname = "gc_elasticity",
4990509f 4513 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
4514 .maxlen = sizeof(int),
4515 .mode = 0644,
f3d3f616 4516 .proc_handler = proc_dointvec,
1da177e4
LT
4517 },
4518 {
1da177e4 4519 .procname = "mtu_expires",
4990509f 4520 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
4521 .maxlen = sizeof(int),
4522 .mode = 0644,
6d9f239a 4523 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4524 },
4525 {
1da177e4 4526 .procname = "min_adv_mss",
4990509f 4527 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
4528 .maxlen = sizeof(int),
4529 .mode = 0644,
f3d3f616 4530 .proc_handler = proc_dointvec,
1da177e4
LT
4531 },
4532 {
1da177e4 4533 .procname = "gc_min_interval_ms",
4990509f 4534 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4535 .maxlen = sizeof(int),
4536 .mode = 0644,
6d9f239a 4537 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 4538 },
f8572d8f 4539 { }
1da177e4
LT
4540};
4541
2c8c1e72 4542struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
4543{
4544 struct ctl_table *table;
4545
4546 table = kmemdup(ipv6_route_table_template,
4547 sizeof(ipv6_route_table_template),
4548 GFP_KERNEL);
5ee09105
YH
4549
4550 if (table) {
4551 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 4552 table[0].extra1 = net;
86393e52 4553 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
4554 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
4555 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
4556 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
4557 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
4558 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
4559 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
4560 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 4561 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
4562
4563 /* Don't export sysctls to unprivileged users */
4564 if (net->user_ns != &init_user_ns)
4565 table[0].procname = NULL;
5ee09105
YH
4566 }
4567
760f2d01
DL
4568 return table;
4569}
1da177e4
LT
4570#endif
4571
2c8c1e72 4572static int __net_init ip6_route_net_init(struct net *net)
cdb18761 4573{
633d424b 4574 int ret = -ENOMEM;
8ed67789 4575
86393e52
AD
4576 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
4577 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 4578
fc66f95c
ED
4579 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
4580 goto out_ip6_dst_ops;
4581
8ed67789
DL
4582 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
4583 sizeof(*net->ipv6.ip6_null_entry),
4584 GFP_KERNEL);
4585 if (!net->ipv6.ip6_null_entry)
fc66f95c 4586 goto out_ip6_dst_entries;
d8d1f30b 4587 net->ipv6.ip6_null_entry->dst.path =
8ed67789 4588 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 4589 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4590 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
4591 ip6_template_metrics, true);
8ed67789
DL
4592
4593#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 4594 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
4595 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
4596 sizeof(*net->ipv6.ip6_prohibit_entry),
4597 GFP_KERNEL);
68fffc67
PZ
4598 if (!net->ipv6.ip6_prohibit_entry)
4599 goto out_ip6_null_entry;
d8d1f30b 4600 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 4601 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 4602 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4603 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4604 ip6_template_metrics, true);
8ed67789
DL
4605
4606 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
4607 sizeof(*net->ipv6.ip6_blk_hole_entry),
4608 GFP_KERNEL);
68fffc67
PZ
4609 if (!net->ipv6.ip6_blk_hole_entry)
4610 goto out_ip6_prohibit_entry;
d8d1f30b 4611 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 4612 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 4613 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4614 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
4615 ip6_template_metrics, true);
8ed67789
DL
4616#endif
4617
b339a47c
PZ
4618 net->ipv6.sysctl.flush_delay = 0;
4619 net->ipv6.sysctl.ip6_rt_max_size = 4096;
4620 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
4621 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
4622 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
4623 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
4624 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
4625 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
4626
6891a346
BT
4627 net->ipv6.ip6_rt_gc_expire = 30*HZ;
4628
8ed67789
DL
4629 ret = 0;
4630out:
4631 return ret;
f2fc6a54 4632
68fffc67
PZ
4633#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4634out_ip6_prohibit_entry:
4635 kfree(net->ipv6.ip6_prohibit_entry);
4636out_ip6_null_entry:
4637 kfree(net->ipv6.ip6_null_entry);
4638#endif
fc66f95c
ED
4639out_ip6_dst_entries:
4640 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 4641out_ip6_dst_ops:
f2fc6a54 4642 goto out;
cdb18761
DL
4643}
4644
2c8c1e72 4645static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 4646{
8ed67789
DL
4647 kfree(net->ipv6.ip6_null_entry);
4648#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4649 kfree(net->ipv6.ip6_prohibit_entry);
4650 kfree(net->ipv6.ip6_blk_hole_entry);
4651#endif
41bb78b4 4652 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
4653}
4654
d189634e
TG
4655static int __net_init ip6_route_net_init_late(struct net *net)
4656{
4657#ifdef CONFIG_PROC_FS
d4beaa66
G
4658 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
4659 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
4660#endif
4661 return 0;
4662}
4663
4664static void __net_exit ip6_route_net_exit_late(struct net *net)
4665{
4666#ifdef CONFIG_PROC_FS
ece31ffd
G
4667 remove_proc_entry("ipv6_route", net->proc_net);
4668 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
4669#endif
4670}
4671
cdb18761
DL
4672static struct pernet_operations ip6_route_net_ops = {
4673 .init = ip6_route_net_init,
4674 .exit = ip6_route_net_exit,
4675};
4676
c3426b47
DM
4677static int __net_init ipv6_inetpeer_init(struct net *net)
4678{
4679 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4680
4681 if (!bp)
4682 return -ENOMEM;
4683 inet_peer_base_init(bp);
4684 net->ipv6.peers = bp;
4685 return 0;
4686}
4687
4688static void __net_exit ipv6_inetpeer_exit(struct net *net)
4689{
4690 struct inet_peer_base *bp = net->ipv6.peers;
4691
4692 net->ipv6.peers = NULL;
56a6b248 4693 inetpeer_invalidate_tree(bp);
c3426b47
DM
4694 kfree(bp);
4695}
4696
2b823f72 4697static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
4698 .init = ipv6_inetpeer_init,
4699 .exit = ipv6_inetpeer_exit,
4700};
4701
d189634e
TG
4702static struct pernet_operations ip6_route_net_late_ops = {
4703 .init = ip6_route_net_init_late,
4704 .exit = ip6_route_net_exit_late,
4705};
4706
8ed67789
DL
4707static struct notifier_block ip6_route_dev_notifier = {
4708 .notifier_call = ip6_route_dev_notify,
242d3a49 4709 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
4710};
4711
2f460933
WC
4712void __init ip6_route_init_special_entries(void)
4713{
4714 /* Registering of the loopback is done before this portion of code,
4715 * the loopback reference in rt6_info will not be taken, do it
4716 * manually for init_net */
4717 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
4718 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4719 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4720 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
4721 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4722 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
4723 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4724 #endif
4725}
4726
433d49c3 4727int __init ip6_route_init(void)
1da177e4 4728{
433d49c3 4729 int ret;
8d0b94af 4730 int cpu;
433d49c3 4731
9a7ec3a9
DL
4732 ret = -ENOMEM;
4733 ip6_dst_ops_template.kmem_cachep =
e5d679f3 4734 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 4735 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 4736 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 4737 goto out;
14e50e57 4738
fc66f95c 4739 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 4740 if (ret)
bdb3289f 4741 goto out_kmem_cache;
bdb3289f 4742
c3426b47
DM
4743 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4744 if (ret)
e8803b6c 4745 goto out_dst_entries;
2a0c451a 4746
7e52b33b
DM
4747 ret = register_pernet_subsys(&ip6_route_net_ops);
4748 if (ret)
4749 goto out_register_inetpeer;
c3426b47 4750
5dc121e9
AE
4751 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4752
e8803b6c 4753 ret = fib6_init();
433d49c3 4754 if (ret)
8ed67789 4755 goto out_register_subsys;
433d49c3 4756
433d49c3
DL
4757 ret = xfrm6_init();
4758 if (ret)
e8803b6c 4759 goto out_fib6_init;
c35b7e72 4760
433d49c3
DL
4761 ret = fib6_rules_init();
4762 if (ret)
4763 goto xfrm6_init;
7e5449c2 4764
d189634e
TG
4765 ret = register_pernet_subsys(&ip6_route_net_late_ops);
4766 if (ret)
4767 goto fib6_rules_init;
4768
433d49c3 4769 ret = -ENOBUFS;
b97bac64
FW
4770 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
4771 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
e3a22b7f
FW
4772 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
4773 RTNL_FLAG_DOIT_UNLOCKED))
d189634e 4774 goto out_register_late_subsys;
c127ea2c 4775
8ed67789 4776 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 4777 if (ret)
d189634e 4778 goto out_register_late_subsys;
8ed67789 4779
8d0b94af
MKL
4780 for_each_possible_cpu(cpu) {
4781 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4782
4783 INIT_LIST_HEAD(&ul->head);
4784 spin_lock_init(&ul->lock);
4785 }
4786
433d49c3
DL
4787out:
4788 return ret;
4789
d189634e
TG
4790out_register_late_subsys:
4791 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 4792fib6_rules_init:
433d49c3
DL
4793 fib6_rules_cleanup();
4794xfrm6_init:
433d49c3 4795 xfrm6_fini();
2a0c451a
TG
4796out_fib6_init:
4797 fib6_gc_cleanup();
8ed67789
DL
4798out_register_subsys:
4799 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
4800out_register_inetpeer:
4801 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
4802out_dst_entries:
4803 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 4804out_kmem_cache:
f2fc6a54 4805 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 4806 goto out;
1da177e4
LT
4807}
4808
4809void ip6_route_cleanup(void)
4810{
8ed67789 4811 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 4812 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 4813 fib6_rules_cleanup();
1da177e4 4814 xfrm6_fini();
1da177e4 4815 fib6_gc_cleanup();
c3426b47 4816 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 4817 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 4818 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 4819 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 4820}