]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - net/ipv6/route.c
net/ipv6: Defer initialization of dst to data path
[thirdparty/kernel/stable.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
b811580d 66#include <trace/events/fib6.h>
1da177e4 67
7c0f6ba6 68#include <linux/uaccess.h>
1da177e4
LT
69
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
afc154e9 74enum rt6_nud_state {
7e980569
JB
75 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
78 RT6_NUD_SUCCEED = 1
79};
80
1da177e4 81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 82static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 83static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
569d3645 88static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
89
90static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 91static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 92static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 93static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 94static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
95static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
4b32b5ad 99static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
16a16cd3
DA
101static size_t rt6_nlmsg_size(struct rt6_info *rt);
102static int rt6_fill_node(struct net *net,
103 struct sk_buff *skb, struct rt6_info *rt,
104 struct in6_addr *dst, struct in6_addr *src,
105 int iif, int type, u32 portid, u32 seq,
106 unsigned int flags);
35732d01
WW
107static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
108 struct in6_addr *daddr,
109 struct in6_addr *saddr);
1da177e4 110
70ceb4f5 111#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 112static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 113 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
114 const struct in6_addr *gwaddr,
115 struct net_device *dev,
95c96174 116 unsigned int pref);
efa2cea0 117static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 118 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
119 const struct in6_addr *gwaddr,
120 struct net_device *dev);
70ceb4f5
YH
121#endif
122
8d0b94af
MKL
123struct uncached_list {
124 spinlock_t lock;
125 struct list_head head;
126};
127
128static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
129
510c321b 130void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
131{
132 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
133
8d0b94af
MKL
134 rt->rt6i_uncached_list = ul;
135
136 spin_lock_bh(&ul->lock);
137 list_add_tail(&rt->rt6i_uncached, &ul->head);
138 spin_unlock_bh(&ul->lock);
139}
140
510c321b 141void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
142{
143 if (!list_empty(&rt->rt6i_uncached)) {
144 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 145 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
146
147 spin_lock_bh(&ul->lock);
148 list_del(&rt->rt6i_uncached);
81eb8447 149 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
150 spin_unlock_bh(&ul->lock);
151 }
152}
153
154static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
155{
156 struct net_device *loopback_dev = net->loopback_dev;
157 int cpu;
158
e332bc67
EB
159 if (dev == loopback_dev)
160 return;
161
8d0b94af
MKL
162 for_each_possible_cpu(cpu) {
163 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
164 struct rt6_info *rt;
165
166 spin_lock_bh(&ul->lock);
167 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
168 struct inet6_dev *rt_idev = rt->rt6i_idev;
169 struct net_device *rt_dev = rt->dst.dev;
170
e332bc67 171 if (rt_idev->dev == dev) {
8d0b94af
MKL
172 rt->rt6i_idev = in6_dev_get(loopback_dev);
173 in6_dev_put(rt_idev);
174 }
175
e332bc67 176 if (rt_dev == dev) {
8d0b94af
MKL
177 rt->dst.dev = loopback_dev;
178 dev_hold(rt->dst.dev);
179 dev_put(rt_dev);
180 }
181 }
182 spin_unlock_bh(&ul->lock);
183 }
184}
185
d52d3997
MKL
186static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
187{
3a2232e9 188 return dst_metrics_write_ptr(&rt->from->dst);
d52d3997
MKL
189}
190
06582540
DM
191static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
192{
4b32b5ad 193 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 194
d52d3997
MKL
195 if (rt->rt6i_flags & RTF_PCPU)
196 return rt6_pcpu_cow_metrics(rt);
197 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
198 return NULL;
199 else
3b471175 200 return dst_cow_metrics_generic(dst, old);
06582540
DM
201}
202
f894cbf8
DM
203static inline const void *choose_neigh_daddr(struct rt6_info *rt,
204 struct sk_buff *skb,
205 const void *daddr)
39232973
DM
206{
207 struct in6_addr *p = &rt->rt6i_gateway;
208
a7563f34 209 if (!ipv6_addr_any(p))
39232973 210 return (const void *) p;
f894cbf8
DM
211 else if (skb)
212 return &ipv6_hdr(skb)->daddr;
39232973
DM
213 return daddr;
214}
215
f894cbf8
DM
216static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
217 struct sk_buff *skb,
218 const void *daddr)
d3aaeb38 219{
39232973
DM
220 struct rt6_info *rt = (struct rt6_info *) dst;
221 struct neighbour *n;
222
f894cbf8 223 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 224 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
225 if (n)
226 return n;
227 return neigh_create(&nd_tbl, daddr, dst->dev);
228}
229
63fca65d
JA
230static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
231{
232 struct net_device *dev = dst->dev;
233 struct rt6_info *rt = (struct rt6_info *)dst;
234
235 daddr = choose_neigh_daddr(rt, NULL, daddr);
236 if (!daddr)
237 return;
238 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
239 return;
240 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
241 return;
242 __ipv6_confirm_neigh(dev, daddr);
243}
244
9a7ec3a9 245static struct dst_ops ip6_dst_ops_template = {
1da177e4 246 .family = AF_INET6,
1da177e4
LT
247 .gc = ip6_dst_gc,
248 .gc_thresh = 1024,
249 .check = ip6_dst_check,
0dbaee3b 250 .default_advmss = ip6_default_advmss,
ebb762f2 251 .mtu = ip6_mtu,
06582540 252 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
253 .destroy = ip6_dst_destroy,
254 .ifdown = ip6_dst_ifdown,
255 .negative_advice = ip6_negative_advice,
256 .link_failure = ip6_link_failure,
257 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 258 .redirect = rt6_do_redirect,
9f8955cc 259 .local_out = __ip6_local_out,
d3aaeb38 260 .neigh_lookup = ip6_neigh_lookup,
63fca65d 261 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
262};
263
ebb762f2 264static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 265{
618f9bc7
SK
266 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
267
268 return mtu ? : dst->dev->mtu;
ec831ea7
RD
269}
270
6700c270
DM
271static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
272 struct sk_buff *skb, u32 mtu)
14e50e57
DM
273{
274}
275
6700c270
DM
276static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
277 struct sk_buff *skb)
b587ee3b
DM
278{
279}
280
14e50e57
DM
281static struct dst_ops ip6_dst_blackhole_ops = {
282 .family = AF_INET6,
14e50e57
DM
283 .destroy = ip6_dst_destroy,
284 .check = ip6_dst_check,
ebb762f2 285 .mtu = ip6_blackhole_mtu,
214f45c9 286 .default_advmss = ip6_default_advmss,
14e50e57 287 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 288 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 289 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 290 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
291};
292
62fa8a84 293static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 294 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
295};
296
fb0af4c7 297static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
298 .dst = {
299 .__refcnt = ATOMIC_INIT(1),
300 .__use = 1,
2c20cbd7 301 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 302 .error = -ENETUNREACH,
d8d1f30b
CG
303 .input = ip6_pkt_discard,
304 .output = ip6_pkt_discard_out,
1da177e4
LT
305 },
306 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 307 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
308 .rt6i_metric = ~(u32) 0,
309 .rt6i_ref = ATOMIC_INIT(1),
e8478e80 310 .fib6_type = RTN_UNREACHABLE,
1da177e4
LT
311};
312
101367c2
TG
313#ifdef CONFIG_IPV6_MULTIPLE_TABLES
314
fb0af4c7 315static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
316 .dst = {
317 .__refcnt = ATOMIC_INIT(1),
318 .__use = 1,
2c20cbd7 319 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 320 .error = -EACCES,
d8d1f30b
CG
321 .input = ip6_pkt_prohibit,
322 .output = ip6_pkt_prohibit_out,
101367c2
TG
323 },
324 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 325 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
326 .rt6i_metric = ~(u32) 0,
327 .rt6i_ref = ATOMIC_INIT(1),
e8478e80 328 .fib6_type = RTN_PROHIBIT,
101367c2
TG
329};
330
fb0af4c7 331static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
332 .dst = {
333 .__refcnt = ATOMIC_INIT(1),
334 .__use = 1,
2c20cbd7 335 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 336 .error = -EINVAL,
d8d1f30b 337 .input = dst_discard,
ede2059d 338 .output = dst_discard_out,
101367c2
TG
339 },
340 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 341 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
342 .rt6i_metric = ~(u32) 0,
343 .rt6i_ref = ATOMIC_INIT(1),
e8478e80 344 .fib6_type = RTN_BLACKHOLE,
101367c2
TG
345};
346
347#endif
348
ebfa45f0
MKL
349static void rt6_info_init(struct rt6_info *rt)
350{
351 struct dst_entry *dst = &rt->dst;
352
353 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
354 INIT_LIST_HEAD(&rt->rt6i_siblings);
355 INIT_LIST_HEAD(&rt->rt6i_uncached);
356}
357
1da177e4 358/* allocate dst with ip6_dst_ops */
d52d3997
MKL
359static struct rt6_info *__ip6_dst_alloc(struct net *net,
360 struct net_device *dev,
ad706862 361 int flags)
1da177e4 362{
97bab73f 363 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 364 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 365
81eb8447 366 if (rt) {
ebfa45f0 367 rt6_info_init(rt);
81eb8447
WW
368 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
369 }
8104891b 370
cf911662 371 return rt;
1da177e4
LT
372}
373
9ab179d8
DA
374struct rt6_info *ip6_dst_alloc(struct net *net,
375 struct net_device *dev,
376 int flags)
d52d3997 377{
ad706862 378 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
379
380 if (rt) {
381 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
bfd8e5a4 382 if (!rt->rt6i_pcpu) {
587fea74 383 dst_release_immediate(&rt->dst);
d52d3997
MKL
384 return NULL;
385 }
386 }
387
388 return rt;
389}
9ab179d8 390EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 391
1da177e4
LT
392static void ip6_dst_destroy(struct dst_entry *dst)
393{
394 struct rt6_info *rt = (struct rt6_info *)dst;
35732d01 395 struct rt6_exception_bucket *bucket;
3a2232e9 396 struct rt6_info *from = rt->from;
8d0b94af 397 struct inet6_dev *idev;
1da177e4 398
4b32b5ad 399 dst_destroy_metrics_generic(dst);
87775312 400 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
401 rt6_uncached_list_del(rt);
402
403 idev = rt->rt6i_idev;
38308473 404 if (idev) {
1da177e4
LT
405 rt->rt6i_idev = NULL;
406 in6_dev_put(idev);
1ab1457c 407 }
35732d01
WW
408 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
409 if (bucket) {
410 rt->rt6i_exception_bucket = NULL;
411 kfree(bucket);
412 }
1716a961 413
3a2232e9
DM
414 rt->from = NULL;
415 dst_release(&from->dst);
b3419363
DM
416}
417
1da177e4
LT
418static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
419 int how)
420{
421 struct rt6_info *rt = (struct rt6_info *)dst;
422 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 423 struct net_device *loopback_dev =
c346dca1 424 dev_net(dev)->loopback_dev;
1da177e4 425
e5645f51
WW
426 if (idev && idev->dev != loopback_dev) {
427 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
428 if (loopback_idev) {
429 rt->rt6i_idev = loopback_idev;
430 in6_dev_put(idev);
97cac082 431 }
1da177e4
LT
432 }
433}
434
5973fb1e
MKL
435static bool __rt6_check_expired(const struct rt6_info *rt)
436{
437 if (rt->rt6i_flags & RTF_EXPIRES)
438 return time_after(jiffies, rt->dst.expires);
439 else
440 return false;
441}
442
a50feda5 443static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 444{
1716a961
G
445 if (rt->rt6i_flags & RTF_EXPIRES) {
446 if (time_after(jiffies, rt->dst.expires))
a50feda5 447 return true;
3a2232e9 448 } else if (rt->from) {
1e2ea8ad 449 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
3a2232e9 450 rt6_check_expired(rt->from);
1716a961 451 }
a50feda5 452 return false;
1da177e4
LT
453}
454
b4bac172
DA
455static struct rt6_info *rt6_multipath_select(const struct net *net,
456 struct rt6_info *match,
52bd4c0c 457 struct flowi6 *fl6, int oif,
b75cc8f9 458 const struct sk_buff *skb,
52bd4c0c 459 int strict)
51ebd318
ND
460{
461 struct rt6_info *sibling, *next_sibling;
51ebd318 462
b673d6cc
JS
463 /* We might have already computed the hash for ICMPv6 errors. In such
464 * case it will always be non-zero. Otherwise now is the time to do it.
465 */
466 if (!fl6->mp_hash)
b4bac172 467 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 468
5e670d84 469 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
3d709f69
IS
470 return match;
471
472 list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
473 rt6i_siblings) {
5e670d84
DA
474 int nh_upper_bound;
475
476 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
477 if (fl6->mp_hash > nh_upper_bound)
3d709f69
IS
478 continue;
479 if (rt6_score_route(sibling, oif, strict) < 0)
480 break;
481 match = sibling;
482 break;
483 }
484
51ebd318
ND
485 return match;
486}
487
1da177e4 488/*
66f5d6ce 489 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
490 */
491
8ed67789
DL
492static inline struct rt6_info *rt6_device_match(struct net *net,
493 struct rt6_info *rt,
b71d1d42 494 const struct in6_addr *saddr,
1da177e4 495 int oif,
d420895e 496 int flags)
1da177e4
LT
497{
498 struct rt6_info *local = NULL;
499 struct rt6_info *sprt;
500
5e670d84
DA
501 if (!oif && ipv6_addr_any(saddr) &&
502 !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
8067bb8c 503 return rt;
dd3abc4e 504
071fb37e 505 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
5e670d84 506 const struct net_device *dev = sprt->fib6_nh.nh_dev;
dd3abc4e 507
5e670d84 508 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
509 continue;
510
dd3abc4e 511 if (oif) {
1da177e4
LT
512 if (dev->ifindex == oif)
513 return sprt;
514 if (dev->flags & IFF_LOOPBACK) {
38308473 515 if (!sprt->rt6i_idev ||
1da177e4 516 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 517 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 518 continue;
17fb0b2b
DA
519 if (local &&
520 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
521 continue;
522 }
523 local = sprt;
524 }
dd3abc4e
YH
525 } else {
526 if (ipv6_chk_addr(net, saddr, dev,
527 flags & RT6_LOOKUP_F_IFACE))
528 return sprt;
1da177e4 529 }
dd3abc4e 530 }
1da177e4 531
dd3abc4e 532 if (oif) {
1da177e4
LT
533 if (local)
534 return local;
535
d420895e 536 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 537 return net->ipv6.ip6_null_entry;
1da177e4 538 }
8067bb8c 539
5e670d84 540 return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt;
1da177e4
LT
541}
542
27097255 543#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
544struct __rt6_probe_work {
545 struct work_struct work;
546 struct in6_addr target;
547 struct net_device *dev;
548};
549
550static void rt6_probe_deferred(struct work_struct *w)
551{
552 struct in6_addr mcaddr;
553 struct __rt6_probe_work *work =
554 container_of(w, struct __rt6_probe_work, work);
555
556 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 557 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 558 dev_put(work->dev);
662f5533 559 kfree(work);
c2f17e82
HFS
560}
561
27097255
YH
562static void rt6_probe(struct rt6_info *rt)
563{
990edb42 564 struct __rt6_probe_work *work;
5e670d84 565 const struct in6_addr *nh_gw;
f2c31e32 566 struct neighbour *neigh;
5e670d84
DA
567 struct net_device *dev;
568
27097255
YH
569 /*
570 * Okay, this does not seem to be appropriate
571 * for now, however, we need to check if it
572 * is really so; aka Router Reachability Probing.
573 *
574 * Router Reachability Probe MUST be rate-limited
575 * to no more than one per minute.
576 */
2152caea 577 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 578 return;
5e670d84
DA
579
580 nh_gw = &rt->fib6_nh.nh_gw;
581 dev = rt->fib6_nh.nh_dev;
2152caea 582 rcu_read_lock_bh();
5e670d84 583 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 584 if (neigh) {
8d6c31bf
MKL
585 if (neigh->nud_state & NUD_VALID)
586 goto out;
587
990edb42 588 work = NULL;
2152caea 589 write_lock(&neigh->lock);
990edb42
MKL
590 if (!(neigh->nud_state & NUD_VALID) &&
591 time_after(jiffies,
592 neigh->updated +
593 rt->rt6i_idev->cnf.rtr_probe_interval)) {
594 work = kmalloc(sizeof(*work), GFP_ATOMIC);
595 if (work)
596 __neigh_set_probe_once(neigh);
c2f17e82 597 }
2152caea 598 write_unlock(&neigh->lock);
990edb42
MKL
599 } else {
600 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 601 }
990edb42
MKL
602
603 if (work) {
604 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
605 work->target = *nh_gw;
606 dev_hold(dev);
607 work->dev = dev;
990edb42
MKL
608 schedule_work(&work->work);
609 }
610
8d6c31bf 611out:
2152caea 612 rcu_read_unlock_bh();
27097255
YH
613}
614#else
615static inline void rt6_probe(struct rt6_info *rt)
616{
27097255
YH
617}
618#endif
619
1da177e4 620/*
554cfb7e 621 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 622 */
b6f99a21 623static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 624{
5e670d84
DA
625 const struct net_device *dev = rt->fib6_nh.nh_dev;
626
161980f4 627 if (!oif || dev->ifindex == oif)
554cfb7e 628 return 2;
161980f4
DM
629 if ((dev->flags & IFF_LOOPBACK) &&
630 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
631 return 1;
632 return 0;
554cfb7e 633}
1da177e4 634
afc154e9 635static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 636{
afc154e9 637 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 638 struct neighbour *neigh;
f2c31e32 639
4d0c5911
YH
640 if (rt->rt6i_flags & RTF_NONEXTHOP ||
641 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 642 return RT6_NUD_SUCCEED;
145a3621
YH
643
644 rcu_read_lock_bh();
5e670d84
DA
645 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
646 &rt->fib6_nh.nh_gw);
145a3621
YH
647 if (neigh) {
648 read_lock(&neigh->lock);
554cfb7e 649 if (neigh->nud_state & NUD_VALID)
afc154e9 650 ret = RT6_NUD_SUCCEED;
398bcbeb 651#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 652 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 653 ret = RT6_NUD_SUCCEED;
7e980569
JB
654 else
655 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 656#endif
145a3621 657 read_unlock(&neigh->lock);
afc154e9
HFS
658 } else {
659 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 660 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 661 }
145a3621
YH
662 rcu_read_unlock_bh();
663
a5a81f0b 664 return ret;
1da177e4
LT
665}
666
554cfb7e
YH
667static int rt6_score_route(struct rt6_info *rt, int oif,
668 int strict)
1da177e4 669{
a5a81f0b 670 int m;
1ab1457c 671
4d0c5911 672 m = rt6_check_dev(rt, oif);
77d16f45 673 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 674 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
675#ifdef CONFIG_IPV6_ROUTER_PREF
676 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
677#endif
afc154e9
HFS
678 if (strict & RT6_LOOKUP_F_REACHABLE) {
679 int n = rt6_check_neigh(rt);
680 if (n < 0)
681 return n;
682 }
554cfb7e
YH
683 return m;
684}
685
f11e6659 686static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
687 int *mpri, struct rt6_info *match,
688 bool *do_rr)
554cfb7e 689{
f11e6659 690 int m;
afc154e9 691 bool match_do_rr = false;
35103d11 692 struct inet6_dev *idev = rt->rt6i_idev;
35103d11 693
5e670d84 694 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
695 goto out;
696
14c5206c 697 if (idev->cnf.ignore_routes_with_linkdown &&
5e670d84 698 rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 699 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 700 goto out;
f11e6659
DM
701
702 if (rt6_check_expired(rt))
703 goto out;
704
705 m = rt6_score_route(rt, oif, strict);
7e980569 706 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
707 match_do_rr = true;
708 m = 0; /* lowest valid score */
7e980569 709 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 710 goto out;
afc154e9
HFS
711 }
712
713 if (strict & RT6_LOOKUP_F_REACHABLE)
714 rt6_probe(rt);
f11e6659 715
7e980569 716 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 717 if (m > *mpri) {
afc154e9 718 *do_rr = match_do_rr;
f11e6659
DM
719 *mpri = m;
720 match = rt;
f11e6659 721 }
f11e6659
DM
722out:
723 return match;
724}
725
726static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
8d1040e8 727 struct rt6_info *leaf,
f11e6659 728 struct rt6_info *rr_head,
afc154e9
HFS
729 u32 metric, int oif, int strict,
730 bool *do_rr)
f11e6659 731{
9fbdcfaf 732 struct rt6_info *rt, *match, *cont;
554cfb7e 733 int mpri = -1;
1da177e4 734
f11e6659 735 match = NULL;
9fbdcfaf 736 cont = NULL;
071fb37e 737 for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
9fbdcfaf
SK
738 if (rt->rt6i_metric != metric) {
739 cont = rt;
740 break;
741 }
742
743 match = find_match(rt, oif, strict, &mpri, match, do_rr);
744 }
745
66f5d6ce 746 for (rt = leaf; rt && rt != rr_head;
071fb37e 747 rt = rcu_dereference(rt->rt6_next)) {
9fbdcfaf
SK
748 if (rt->rt6i_metric != metric) {
749 cont = rt;
750 break;
751 }
752
afc154e9 753 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
754 }
755
756 if (match || !cont)
757 return match;
758
071fb37e 759 for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
afc154e9 760 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 761
f11e6659
DM
762 return match;
763}
1da177e4 764
8d1040e8
WW
765static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
766 int oif, int strict)
f11e6659 767{
66f5d6ce 768 struct rt6_info *leaf = rcu_dereference(fn->leaf);
f11e6659 769 struct rt6_info *match, *rt0;
afc154e9 770 bool do_rr = false;
17ecf590 771 int key_plen;
1da177e4 772
87b1af8d 773 if (!leaf || leaf == net->ipv6.ip6_null_entry)
8d1040e8
WW
774 return net->ipv6.ip6_null_entry;
775
66f5d6ce 776 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 777 if (!rt0)
66f5d6ce 778 rt0 = leaf;
1da177e4 779
17ecf590
WW
780 /* Double check to make sure fn is not an intermediate node
781 * and fn->leaf does not points to its child's leaf
782 * (This might happen if all routes under fn are deleted from
783 * the tree and fib6_repair_tree() is called on the node.)
784 */
785 key_plen = rt0->rt6i_dst.plen;
786#ifdef CONFIG_IPV6_SUBTREES
787 if (rt0->rt6i_src.plen)
788 key_plen = rt0->rt6i_src.plen;
789#endif
790 if (fn->fn_bit != key_plen)
791 return net->ipv6.ip6_null_entry;
792
8d1040e8 793 match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
afc154e9 794 &do_rr);
1da177e4 795
afc154e9 796 if (do_rr) {
071fb37e 797 struct rt6_info *next = rcu_dereference(rt0->rt6_next);
f11e6659 798
554cfb7e 799 /* no entries matched; do round-robin */
f11e6659 800 if (!next || next->rt6i_metric != rt0->rt6i_metric)
8d1040e8 801 next = leaf;
f11e6659 802
66f5d6ce
WW
803 if (next != rt0) {
804 spin_lock_bh(&leaf->rt6i_table->tb6_lock);
805 /* make sure next is not being deleted from the tree */
806 if (next->rt6i_node)
807 rcu_assign_pointer(fn->rr_ptr, next);
808 spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
809 }
1da177e4 810 }
1da177e4 811
a02cec21 812 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
813}
814
8b9df265
MKL
815static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
816{
817 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
818}
819
70ceb4f5
YH
820#ifdef CONFIG_IPV6_ROUTE_INFO
821int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 822 const struct in6_addr *gwaddr)
70ceb4f5 823{
c346dca1 824 struct net *net = dev_net(dev);
70ceb4f5
YH
825 struct route_info *rinfo = (struct route_info *) opt;
826 struct in6_addr prefix_buf, *prefix;
827 unsigned int pref;
4bed72e4 828 unsigned long lifetime;
70ceb4f5
YH
829 struct rt6_info *rt;
830
831 if (len < sizeof(struct route_info)) {
832 return -EINVAL;
833 }
834
835 /* Sanity check for prefix_len and length */
836 if (rinfo->length > 3) {
837 return -EINVAL;
838 } else if (rinfo->prefix_len > 128) {
839 return -EINVAL;
840 } else if (rinfo->prefix_len > 64) {
841 if (rinfo->length < 2) {
842 return -EINVAL;
843 }
844 } else if (rinfo->prefix_len > 0) {
845 if (rinfo->length < 1) {
846 return -EINVAL;
847 }
848 }
849
850 pref = rinfo->route_pref;
851 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 852 return -EINVAL;
70ceb4f5 853
4bed72e4 854 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
855
856 if (rinfo->length == 3)
857 prefix = (struct in6_addr *)rinfo->prefix;
858 else {
859 /* this function is safe */
860 ipv6_addr_prefix(&prefix_buf,
861 (struct in6_addr *)rinfo->prefix,
862 rinfo->prefix_len);
863 prefix = &prefix_buf;
864 }
865
f104a567 866 if (rinfo->prefix_len == 0)
afb1d4b5 867 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
868 else
869 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 870 gwaddr, dev);
70ceb4f5
YH
871
872 if (rt && !lifetime) {
afb1d4b5 873 ip6_del_rt(net, rt);
70ceb4f5
YH
874 rt = NULL;
875 }
876
877 if (!rt && lifetime)
830218c1
DA
878 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
879 dev, pref);
70ceb4f5
YH
880 else if (rt)
881 rt->rt6i_flags = RTF_ROUTEINFO |
882 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
883
884 if (rt) {
1716a961
G
885 if (!addrconf_finite_timeout(lifetime))
886 rt6_clean_expires(rt);
887 else
888 rt6_set_expires(rt, jiffies + HZ * lifetime);
889
94e187c0 890 ip6_rt_put(rt);
70ceb4f5
YH
891 }
892 return 0;
893}
894#endif
895
ae90d867
DA
896/*
897 * Misc support functions
898 */
899
900/* called with rcu_lock held */
901static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
902{
5e670d84 903 struct net_device *dev = rt->fib6_nh.nh_dev;
ae90d867
DA
904
905 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
906 /* for copies of local routes, dst->dev needs to be the
907 * device if it is a master device, the master device if
908 * device is enslaved, and the loopback as the default
909 */
910 if (netif_is_l3_slave(dev) &&
911 !rt6_need_strict(&rt->rt6i_dst.addr))
912 dev = l3mdev_master_dev_rcu(dev);
913 else if (!netif_is_l3_master(dev))
914 dev = dev_net(dev)->loopback_dev;
915 /* last case is netif_is_l3_master(dev) is true in which
916 * case we want dev returned to be dev
917 */
918 }
919
920 return dev;
921}
922
6edb3c96
DA
923static const int fib6_prop[RTN_MAX + 1] = {
924 [RTN_UNSPEC] = 0,
925 [RTN_UNICAST] = 0,
926 [RTN_LOCAL] = 0,
927 [RTN_BROADCAST] = 0,
928 [RTN_ANYCAST] = 0,
929 [RTN_MULTICAST] = 0,
930 [RTN_BLACKHOLE] = -EINVAL,
931 [RTN_UNREACHABLE] = -EHOSTUNREACH,
932 [RTN_PROHIBIT] = -EACCES,
933 [RTN_THROW] = -EAGAIN,
934 [RTN_NAT] = -EINVAL,
935 [RTN_XRESOLVE] = -EINVAL,
936};
937
938static int ip6_rt_type_to_error(u8 fib6_type)
939{
940 return fib6_prop[fib6_type];
941}
942
943static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct rt6_info *ort)
944{
945 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
946
947 switch (ort->fib6_type) {
948 case RTN_BLACKHOLE:
949 rt->dst.output = dst_discard_out;
950 rt->dst.input = dst_discard;
951 break;
952 case RTN_PROHIBIT:
953 rt->dst.output = ip6_pkt_prohibit_out;
954 rt->dst.input = ip6_pkt_prohibit;
955 break;
956 case RTN_THROW:
957 case RTN_UNREACHABLE:
958 default:
959 rt->dst.output = ip6_pkt_discard_out;
960 rt->dst.input = ip6_pkt_discard;
961 break;
962 }
963}
964
965static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
966{
967 if (ort->rt6i_flags & RTF_REJECT) {
968 ip6_rt_init_dst_reject(rt, ort);
969 return;
970 }
971
972 rt->dst.error = 0;
973 rt->dst.output = ip6_output;
974
975 if (ort->fib6_type == RTN_LOCAL) {
976 rt->dst.flags |= DST_HOST;
977 rt->dst.input = ip6_input;
978 } else if (ipv6_addr_type(&ort->rt6i_dst.addr) & IPV6_ADDR_MULTICAST) {
979 rt->dst.input = ip6_mc_input;
980 } else {
981 rt->dst.input = ip6_forward;
982 }
983
984 if (ort->fib6_nh.nh_lwtstate) {
985 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
986 lwtunnel_set_redirect(&rt->dst);
987 }
988
989 rt->dst.lastuse = jiffies;
990}
991
ae90d867
DA
992static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
993{
994 BUG_ON(from->from);
995
996 rt->rt6i_flags &= ~RTF_EXPIRES;
997 dst_hold(&from->dst);
998 rt->from = from;
999 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
1000}
1001
1002static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
1003{
6edb3c96
DA
1004 ip6_rt_init_dst(rt, ort);
1005
ae90d867 1006 rt->rt6i_dst = ort->rt6i_dst;
ae90d867
DA
1007 rt->rt6i_idev = ort->rt6i_idev;
1008 if (rt->rt6i_idev)
1009 in6_dev_hold(rt->rt6i_idev);
5e670d84 1010 rt->rt6i_gateway = ort->fib6_nh.nh_gw;
ae90d867
DA
1011 rt->rt6i_flags = ort->rt6i_flags;
1012 rt6_set_from(rt, ort);
1013 rt->rt6i_metric = ort->rt6i_metric;
1014#ifdef CONFIG_IPV6_SUBTREES
1015 rt->rt6i_src = ort->rt6i_src;
1016#endif
1017 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
1018 rt->rt6i_table = ort->rt6i_table;
5e670d84 1019 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
ae90d867
DA
1020}
1021
a3c00e46
MKL
1022static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1023 struct in6_addr *saddr)
1024{
66f5d6ce 1025 struct fib6_node *pn, *sn;
a3c00e46
MKL
1026 while (1) {
1027 if (fn->fn_flags & RTN_TL_ROOT)
1028 return NULL;
66f5d6ce
WW
1029 pn = rcu_dereference(fn->parent);
1030 sn = FIB6_SUBTREE(pn);
1031 if (sn && sn != fn)
1032 fn = fib6_lookup(sn, NULL, saddr);
a3c00e46
MKL
1033 else
1034 fn = pn;
1035 if (fn->fn_flags & RTN_RTINFO)
1036 return fn;
1037 }
1038}
c71099ac 1039
d3843fe5
WW
1040static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1041 bool null_fallback)
1042{
1043 struct rt6_info *rt = *prt;
1044
1045 if (dst_hold_safe(&rt->dst))
1046 return true;
1047 if (null_fallback) {
1048 rt = net->ipv6.ip6_null_entry;
1049 dst_hold(&rt->dst);
1050 } else {
1051 rt = NULL;
1052 }
1053 *prt = rt;
1054 return false;
1055}
1056
8ed67789
DL
1057static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1058 struct fib6_table *table,
b75cc8f9
DA
1059 struct flowi6 *fl6,
1060 const struct sk_buff *skb,
1061 int flags)
1da177e4 1062{
2b760fcf 1063 struct rt6_info *rt, *rt_cache;
1da177e4 1064 struct fib6_node *fn;
1da177e4 1065
b6cdbc85
DA
1066 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1067 flags &= ~RT6_LOOKUP_F_IFACE;
1068
66f5d6ce 1069 rcu_read_lock();
4c9483b2 1070 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1071restart:
66f5d6ce
WW
1072 rt = rcu_dereference(fn->leaf);
1073 if (!rt) {
1074 rt = net->ipv6.ip6_null_entry;
1075 } else {
1076 rt = rt6_device_match(net, rt, &fl6->saddr,
1077 fl6->flowi6_oif, flags);
1078 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
b4bac172 1079 rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
b75cc8f9 1080 skb, flags);
66f5d6ce 1081 }
a3c00e46
MKL
1082 if (rt == net->ipv6.ip6_null_entry) {
1083 fn = fib6_backtrack(fn, &fl6->saddr);
1084 if (fn)
1085 goto restart;
1086 }
2b760fcf
WW
1087 /* Search through exception table */
1088 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
1089 if (rt_cache)
1090 rt = rt_cache;
1091
d3843fe5
WW
1092 if (ip6_hold_safe(net, &rt, true))
1093 dst_use_noref(&rt->dst, jiffies);
1094
66f5d6ce 1095 rcu_read_unlock();
b811580d 1096
b65f164d 1097 trace_fib6_table_lookup(net, rt, table, fl6);
b811580d 1098
c71099ac
TG
1099 return rt;
1100
1101}
1102
67ba4152 1103struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1104 const struct sk_buff *skb, int flags)
ea6e574e 1105{
b75cc8f9 1106 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1107}
1108EXPORT_SYMBOL_GPL(ip6_route_lookup);
1109
9acd9f3a 1110struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1111 const struct in6_addr *saddr, int oif,
1112 const struct sk_buff *skb, int strict)
c71099ac 1113{
4c9483b2
DM
1114 struct flowi6 fl6 = {
1115 .flowi6_oif = oif,
1116 .daddr = *daddr,
c71099ac
TG
1117 };
1118 struct dst_entry *dst;
77d16f45 1119 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1120
adaa70bb 1121 if (saddr) {
4c9483b2 1122 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1123 flags |= RT6_LOOKUP_F_HAS_SADDR;
1124 }
1125
b75cc8f9 1126 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1127 if (dst->error == 0)
1128 return (struct rt6_info *) dst;
1129
1130 dst_release(dst);
1131
1da177e4
LT
1132 return NULL;
1133}
7159039a
YH
1134EXPORT_SYMBOL(rt6_lookup);
1135
c71099ac 1136/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1137 * It takes new route entry, the addition fails by any reason the
1138 * route is released.
1139 * Caller must hold dst before calling it.
1da177e4
LT
1140 */
1141
e5fd387a 1142static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
333c4301
DA
1143 struct mx6_config *mxc,
1144 struct netlink_ext_ack *extack)
1da177e4
LT
1145{
1146 int err;
c71099ac 1147 struct fib6_table *table;
1da177e4 1148
c71099ac 1149 table = rt->rt6i_table;
66f5d6ce 1150 spin_lock_bh(&table->tb6_lock);
333c4301 1151 err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
66f5d6ce 1152 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1153
1154 return err;
1155}
1156
afb1d4b5 1157int ip6_ins_rt(struct net *net, struct rt6_info *rt)
40e22e8f 1158{
afb1d4b5 1159 struct nl_info info = { .nl_net = net, };
e715b6d3
FW
1160 struct mx6_config mxc = { .mx = NULL, };
1161
1cfb71ee
WW
1162 /* Hold dst to account for the reference from the fib6 tree */
1163 dst_hold(&rt->dst);
333c4301 1164 return __ip6_ins_rt(rt, &info, &mxc, NULL);
40e22e8f
TG
1165}
1166
8b9df265
MKL
1167static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
1168 const struct in6_addr *daddr,
1169 const struct in6_addr *saddr)
1da177e4 1170{
4832c30d 1171 struct net_device *dev;
1da177e4
LT
1172 struct rt6_info *rt;
1173
1174 /*
1175 * Clone the route.
1176 */
1177
d52d3997 1178 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
3a2232e9 1179 ort = ort->from;
1da177e4 1180
4832c30d
DA
1181 rcu_read_lock();
1182 dev = ip6_rt_get_dev_rcu(ort);
1183 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1184 rcu_read_unlock();
83a09abd
MKL
1185 if (!rt)
1186 return NULL;
1187
1188 ip6_rt_copy_init(rt, ort);
1189 rt->rt6i_flags |= RTF_CACHE;
1190 rt->rt6i_metric = 0;
1191 rt->dst.flags |= DST_HOST;
1192 rt->rt6i_dst.addr = *daddr;
1193 rt->rt6i_dst.plen = 128;
1da177e4 1194
83a09abd
MKL
1195 if (!rt6_is_gw_or_nonexthop(ort)) {
1196 if (ort->rt6i_dst.plen != 128 &&
1197 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1198 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1199#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1200 if (rt->rt6i_src.plen && saddr) {
1201 rt->rt6i_src.addr = *saddr;
1202 rt->rt6i_src.plen = 128;
8b9df265 1203 }
83a09abd 1204#endif
95a9a5ba 1205 }
1da177e4 1206
95a9a5ba
YH
1207 return rt;
1208}
1da177e4 1209
d52d3997
MKL
1210static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1211{
4832c30d 1212 struct net_device *dev;
d52d3997
MKL
1213 struct rt6_info *pcpu_rt;
1214
4832c30d
DA
1215 rcu_read_lock();
1216 dev = ip6_rt_get_dev_rcu(rt);
1217 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1218 rcu_read_unlock();
d52d3997
MKL
1219 if (!pcpu_rt)
1220 return NULL;
1221 ip6_rt_copy_init(pcpu_rt, rt);
1222 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1223 pcpu_rt->rt6i_flags |= RTF_PCPU;
1224 return pcpu_rt;
1225}
1226
66f5d6ce 1227/* It should be called with rcu_read_lock() acquired */
d52d3997
MKL
1228static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1229{
a73e4195 1230 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1231
1232 p = this_cpu_ptr(rt->rt6i_pcpu);
1233 pcpu_rt = *p;
1234
d3843fe5 1235 if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
a73e4195 1236 rt6_dst_from_metrics_check(pcpu_rt);
d3843fe5 1237
a73e4195
MKL
1238 return pcpu_rt;
1239}
1240
afb1d4b5
DA
1241static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1242 struct rt6_info *rt)
a73e4195
MKL
1243{
1244 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1245
1246 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1247 if (!pcpu_rt) {
9c7370a1
MKL
1248 dst_hold(&net->ipv6.ip6_null_entry->dst);
1249 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1250 }
1251
a94b9367
WW
1252 dst_hold(&pcpu_rt->dst);
1253 p = this_cpu_ptr(rt->rt6i_pcpu);
1254 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1255 BUG_ON(prev);
a94b9367 1256
d52d3997
MKL
1257 rt6_dst_from_metrics_check(pcpu_rt);
1258 return pcpu_rt;
1259}
1260
35732d01
WW
1261/* exception hash table implementation
1262 */
1263static DEFINE_SPINLOCK(rt6_exception_lock);
1264
1265/* Remove rt6_ex from hash table and free the memory
1266 * Caller must hold rt6_exception_lock
1267 */
1268static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1269 struct rt6_exception *rt6_ex)
1270{
b2427e67 1271 struct net *net;
81eb8447 1272
35732d01
WW
1273 if (!bucket || !rt6_ex)
1274 return;
b2427e67
CIK
1275
1276 net = dev_net(rt6_ex->rt6i->dst.dev);
35732d01
WW
1277 rt6_ex->rt6i->rt6i_node = NULL;
1278 hlist_del_rcu(&rt6_ex->hlist);
1279 rt6_release(rt6_ex->rt6i);
1280 kfree_rcu(rt6_ex, rcu);
1281 WARN_ON_ONCE(!bucket->depth);
1282 bucket->depth--;
81eb8447 1283 net->ipv6.rt6_stats->fib_rt_cache--;
35732d01
WW
1284}
1285
1286/* Remove oldest rt6_ex in bucket and free the memory
1287 * Caller must hold rt6_exception_lock
1288 */
1289static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1290{
1291 struct rt6_exception *rt6_ex, *oldest = NULL;
1292
1293 if (!bucket)
1294 return;
1295
1296 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1297 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1298 oldest = rt6_ex;
1299 }
1300 rt6_remove_exception(bucket, oldest);
1301}
1302
1303static u32 rt6_exception_hash(const struct in6_addr *dst,
1304 const struct in6_addr *src)
1305{
1306 static u32 seed __read_mostly;
1307 u32 val;
1308
1309 net_get_random_once(&seed, sizeof(seed));
1310 val = jhash(dst, sizeof(*dst), seed);
1311
1312#ifdef CONFIG_IPV6_SUBTREES
1313 if (src)
1314 val = jhash(src, sizeof(*src), val);
1315#endif
1316 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1317}
1318
1319/* Helper function to find the cached rt in the hash table
1320 * and update bucket pointer to point to the bucket for this
1321 * (daddr, saddr) pair
1322 * Caller must hold rt6_exception_lock
1323 */
1324static struct rt6_exception *
1325__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1326 const struct in6_addr *daddr,
1327 const struct in6_addr *saddr)
1328{
1329 struct rt6_exception *rt6_ex;
1330 u32 hval;
1331
1332 if (!(*bucket) || !daddr)
1333 return NULL;
1334
1335 hval = rt6_exception_hash(daddr, saddr);
1336 *bucket += hval;
1337
1338 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1339 struct rt6_info *rt6 = rt6_ex->rt6i;
1340 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1341
1342#ifdef CONFIG_IPV6_SUBTREES
1343 if (matched && saddr)
1344 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1345#endif
1346 if (matched)
1347 return rt6_ex;
1348 }
1349 return NULL;
1350}
1351
1352/* Helper function to find the cached rt in the hash table
1353 * and update bucket pointer to point to the bucket for this
1354 * (daddr, saddr) pair
1355 * Caller must hold rcu_read_lock()
1356 */
1357static struct rt6_exception *
1358__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1359 const struct in6_addr *daddr,
1360 const struct in6_addr *saddr)
1361{
1362 struct rt6_exception *rt6_ex;
1363 u32 hval;
1364
1365 WARN_ON_ONCE(!rcu_read_lock_held());
1366
1367 if (!(*bucket) || !daddr)
1368 return NULL;
1369
1370 hval = rt6_exception_hash(daddr, saddr);
1371 *bucket += hval;
1372
1373 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1374 struct rt6_info *rt6 = rt6_ex->rt6i;
1375 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1376
1377#ifdef CONFIG_IPV6_SUBTREES
1378 if (matched && saddr)
1379 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1380#endif
1381 if (matched)
1382 return rt6_ex;
1383 }
1384 return NULL;
1385}
1386
1387static int rt6_insert_exception(struct rt6_info *nrt,
1388 struct rt6_info *ort)
1389{
5e670d84 1390 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1391 struct rt6_exception_bucket *bucket;
1392 struct in6_addr *src_key = NULL;
1393 struct rt6_exception *rt6_ex;
1394 int err = 0;
1395
1396 /* ort can't be a cache or pcpu route */
1397 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
3a2232e9 1398 ort = ort->from;
35732d01
WW
1399 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1400
1401 spin_lock_bh(&rt6_exception_lock);
1402
1403 if (ort->exception_bucket_flushed) {
1404 err = -EINVAL;
1405 goto out;
1406 }
1407
1408 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1409 lockdep_is_held(&rt6_exception_lock));
1410 if (!bucket) {
1411 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1412 GFP_ATOMIC);
1413 if (!bucket) {
1414 err = -ENOMEM;
1415 goto out;
1416 }
1417 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1418 }
1419
1420#ifdef CONFIG_IPV6_SUBTREES
1421 /* rt6i_src.plen != 0 indicates ort is in subtree
1422 * and exception table is indexed by a hash of
1423 * both rt6i_dst and rt6i_src.
1424 * Otherwise, the exception table is indexed by
1425 * a hash of only rt6i_dst.
1426 */
1427 if (ort->rt6i_src.plen)
1428 src_key = &nrt->rt6i_src.addr;
1429#endif
60006a48
WW
1430
1431 /* Update rt6i_prefsrc as it could be changed
1432 * in rt6_remove_prefsrc()
1433 */
1434 nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
f5bbe7ee
WW
1435 /* rt6_mtu_change() might lower mtu on ort.
1436 * Only insert this exception route if its mtu
1437 * is less than ort's mtu value.
1438 */
1439 if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
1440 err = -EINVAL;
1441 goto out;
1442 }
60006a48 1443
35732d01
WW
1444 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1445 src_key);
1446 if (rt6_ex)
1447 rt6_remove_exception(bucket, rt6_ex);
1448
1449 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1450 if (!rt6_ex) {
1451 err = -ENOMEM;
1452 goto out;
1453 }
1454 rt6_ex->rt6i = nrt;
1455 rt6_ex->stamp = jiffies;
1456 atomic_inc(&nrt->rt6i_ref);
1457 nrt->rt6i_node = ort->rt6i_node;
1458 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1459 bucket->depth++;
81eb8447 1460 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1461
1462 if (bucket->depth > FIB6_MAX_DEPTH)
1463 rt6_exception_remove_oldest(bucket);
1464
1465out:
1466 spin_unlock_bh(&rt6_exception_lock);
1467
1468 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1469 if (!err) {
922c2ac8 1470 spin_lock_bh(&ort->rt6i_table->tb6_lock);
7aef6859 1471 fib6_update_sernum(net, ort);
922c2ac8 1472 spin_unlock_bh(&ort->rt6i_table->tb6_lock);
b886d5f2
PA
1473 fib6_force_start_gc(net);
1474 }
35732d01
WW
1475
1476 return err;
1477}
1478
1479void rt6_flush_exceptions(struct rt6_info *rt)
1480{
1481 struct rt6_exception_bucket *bucket;
1482 struct rt6_exception *rt6_ex;
1483 struct hlist_node *tmp;
1484 int i;
1485
1486 spin_lock_bh(&rt6_exception_lock);
1487 /* Prevent rt6_insert_exception() to recreate the bucket list */
1488 rt->exception_bucket_flushed = 1;
1489
1490 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1491 lockdep_is_held(&rt6_exception_lock));
1492 if (!bucket)
1493 goto out;
1494
1495 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1496 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1497 rt6_remove_exception(bucket, rt6_ex);
1498 WARN_ON_ONCE(bucket->depth);
1499 bucket++;
1500 }
1501
1502out:
1503 spin_unlock_bh(&rt6_exception_lock);
1504}
1505
1506/* Find cached rt in the hash table inside passed in rt
1507 * Caller has to hold rcu_read_lock()
1508 */
1509static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1510 struct in6_addr *daddr,
1511 struct in6_addr *saddr)
1512{
1513 struct rt6_exception_bucket *bucket;
1514 struct in6_addr *src_key = NULL;
1515 struct rt6_exception *rt6_ex;
1516 struct rt6_info *res = NULL;
1517
1518 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1519
1520#ifdef CONFIG_IPV6_SUBTREES
1521 /* rt6i_src.plen != 0 indicates rt is in subtree
1522 * and exception table is indexed by a hash of
1523 * both rt6i_dst and rt6i_src.
1524 * Otherwise, the exception table is indexed by
1525 * a hash of only rt6i_dst.
1526 */
1527 if (rt->rt6i_src.plen)
1528 src_key = saddr;
1529#endif
1530 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1531
1532 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1533 res = rt6_ex->rt6i;
1534
1535 return res;
1536}
1537
1538/* Remove the passed in cached rt from the hash table that contains it */
1539int rt6_remove_exception_rt(struct rt6_info *rt)
1540{
35732d01 1541 struct rt6_exception_bucket *bucket;
3a2232e9 1542 struct rt6_info *from = rt->from;
35732d01
WW
1543 struct in6_addr *src_key = NULL;
1544 struct rt6_exception *rt6_ex;
1545 int err;
1546
1547 if (!from ||
442d713b 1548 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1549 return -EINVAL;
1550
1551 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1552 return -ENOENT;
1553
1554 spin_lock_bh(&rt6_exception_lock);
1555 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1556 lockdep_is_held(&rt6_exception_lock));
1557#ifdef CONFIG_IPV6_SUBTREES
1558 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1559 * and exception table is indexed by a hash of
1560 * both rt6i_dst and rt6i_src.
1561 * Otherwise, the exception table is indexed by
1562 * a hash of only rt6i_dst.
1563 */
1564 if (from->rt6i_src.plen)
1565 src_key = &rt->rt6i_src.addr;
1566#endif
1567 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1568 &rt->rt6i_dst.addr,
1569 src_key);
1570 if (rt6_ex) {
1571 rt6_remove_exception(bucket, rt6_ex);
1572 err = 0;
1573 } else {
1574 err = -ENOENT;
1575 }
1576
1577 spin_unlock_bh(&rt6_exception_lock);
1578 return err;
1579}
1580
1581/* Find rt6_ex which contains the passed in rt cache and
1582 * refresh its stamp
1583 */
1584static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1585{
35732d01 1586 struct rt6_exception_bucket *bucket;
3a2232e9 1587 struct rt6_info *from = rt->from;
35732d01
WW
1588 struct in6_addr *src_key = NULL;
1589 struct rt6_exception *rt6_ex;
1590
1591 if (!from ||
442d713b 1592 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1593 return;
1594
1595 rcu_read_lock();
1596 bucket = rcu_dereference(from->rt6i_exception_bucket);
1597
1598#ifdef CONFIG_IPV6_SUBTREES
1599 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1600 * and exception table is indexed by a hash of
1601 * both rt6i_dst and rt6i_src.
1602 * Otherwise, the exception table is indexed by
1603 * a hash of only rt6i_dst.
1604 */
1605 if (from->rt6i_src.plen)
1606 src_key = &rt->rt6i_src.addr;
1607#endif
1608 rt6_ex = __rt6_find_exception_rcu(&bucket,
1609 &rt->rt6i_dst.addr,
1610 src_key);
1611 if (rt6_ex)
1612 rt6_ex->stamp = jiffies;
1613
1614 rcu_read_unlock();
1615}
1616
60006a48
WW
1617static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1618{
1619 struct rt6_exception_bucket *bucket;
1620 struct rt6_exception *rt6_ex;
1621 int i;
1622
1623 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1624 lockdep_is_held(&rt6_exception_lock));
1625
1626 if (bucket) {
1627 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1628 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1629 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1630 }
1631 bucket++;
1632 }
1633 }
1634}
1635
e9fa1495
SB
1636static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1637 struct rt6_info *rt, int mtu)
1638{
1639 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1640 * lowest MTU in the path: always allow updating the route PMTU to
1641 * reflect PMTU decreases.
1642 *
1643 * If the new MTU is higher, and the route PMTU is equal to the local
1644 * MTU, this means the old MTU is the lowest in the path, so allow
1645 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1646 * handle this.
1647 */
1648
1649 if (dst_mtu(&rt->dst) >= mtu)
1650 return true;
1651
1652 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1653 return true;
1654
1655 return false;
1656}
1657
1658static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1659 struct rt6_info *rt, int mtu)
f5bbe7ee
WW
1660{
1661 struct rt6_exception_bucket *bucket;
1662 struct rt6_exception *rt6_ex;
1663 int i;
1664
1665 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1666 lockdep_is_held(&rt6_exception_lock));
1667
e9fa1495
SB
1668 if (!bucket)
1669 return;
1670
1671 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1672 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1673 struct rt6_info *entry = rt6_ex->rt6i;
1674
1675 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
1676 * route), the metrics of its rt->dst.from have already
1677 * been updated.
1678 */
1679 if (entry->rt6i_pmtu &&
1680 rt6_mtu_change_route_allowed(idev, entry, mtu))
1681 entry->rt6i_pmtu = mtu;
f5bbe7ee 1682 }
e9fa1495 1683 bucket++;
f5bbe7ee
WW
1684 }
1685}
1686
b16cb459
WW
1687#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1688
1689static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1690 struct in6_addr *gateway)
1691{
1692 struct rt6_exception_bucket *bucket;
1693 struct rt6_exception *rt6_ex;
1694 struct hlist_node *tmp;
1695 int i;
1696
1697 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1698 return;
1699
1700 spin_lock_bh(&rt6_exception_lock);
1701 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1702 lockdep_is_held(&rt6_exception_lock));
1703
1704 if (bucket) {
1705 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1706 hlist_for_each_entry_safe(rt6_ex, tmp,
1707 &bucket->chain, hlist) {
1708 struct rt6_info *entry = rt6_ex->rt6i;
1709
1710 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1711 RTF_CACHE_GATEWAY &&
1712 ipv6_addr_equal(gateway,
1713 &entry->rt6i_gateway)) {
1714 rt6_remove_exception(bucket, rt6_ex);
1715 }
1716 }
1717 bucket++;
1718 }
1719 }
1720
1721 spin_unlock_bh(&rt6_exception_lock);
1722}
1723
c757faa8
WW
1724static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1725 struct rt6_exception *rt6_ex,
1726 struct fib6_gc_args *gc_args,
1727 unsigned long now)
1728{
1729 struct rt6_info *rt = rt6_ex->rt6i;
1730
1859bac0
PA
1731 /* we are pruning and obsoleting aged-out and non gateway exceptions
1732 * even if others have still references to them, so that on next
1733 * dst_check() such references can be dropped.
1734 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1735 * expired, independently from their aging, as per RFC 8201 section 4
1736 */
31afeb42
WW
1737 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1738 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1739 RT6_TRACE("aging clone %p\n", rt);
1740 rt6_remove_exception(bucket, rt6_ex);
1741 return;
1742 }
1743 } else if (time_after(jiffies, rt->dst.expires)) {
1744 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1745 rt6_remove_exception(bucket, rt6_ex);
1746 return;
31afeb42
WW
1747 }
1748
1749 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1750 struct neighbour *neigh;
1751 __u8 neigh_flags = 0;
1752
1bfa26ff
ED
1753 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1754 if (neigh)
c757faa8 1755 neigh_flags = neigh->flags;
1bfa26ff 1756
c757faa8
WW
1757 if (!(neigh_flags & NTF_ROUTER)) {
1758 RT6_TRACE("purging route %p via non-router but gateway\n",
1759 rt);
1760 rt6_remove_exception(bucket, rt6_ex);
1761 return;
1762 }
1763 }
31afeb42 1764
c757faa8
WW
1765 gc_args->more++;
1766}
1767
1768void rt6_age_exceptions(struct rt6_info *rt,
1769 struct fib6_gc_args *gc_args,
1770 unsigned long now)
1771{
1772 struct rt6_exception_bucket *bucket;
1773 struct rt6_exception *rt6_ex;
1774 struct hlist_node *tmp;
1775 int i;
1776
1777 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1778 return;
1779
1bfa26ff
ED
1780 rcu_read_lock_bh();
1781 spin_lock(&rt6_exception_lock);
c757faa8
WW
1782 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1783 lockdep_is_held(&rt6_exception_lock));
1784
1785 if (bucket) {
1786 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1787 hlist_for_each_entry_safe(rt6_ex, tmp,
1788 &bucket->chain, hlist) {
1789 rt6_age_examine_exception(bucket, rt6_ex,
1790 gc_args, now);
1791 }
1792 bucket++;
1793 }
1794 }
1bfa26ff
ED
1795 spin_unlock(&rt6_exception_lock);
1796 rcu_read_unlock_bh();
c757faa8
WW
1797}
1798
9ff74384 1799struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
b75cc8f9
DA
1800 int oif, struct flowi6 *fl6,
1801 const struct sk_buff *skb, int flags)
1da177e4 1802{
367efcb9 1803 struct fib6_node *fn, *saved_fn;
2b760fcf 1804 struct rt6_info *rt, *rt_cache;
c71099ac 1805 int strict = 0;
1da177e4 1806
77d16f45 1807 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1808 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1809 if (net->ipv6.devconf_all->forwarding == 0)
1810 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1811
66f5d6ce 1812 rcu_read_lock();
1da177e4 1813
4c9483b2 1814 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1815 saved_fn = fn;
1da177e4 1816
ca254490
DA
1817 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1818 oif = 0;
1819
a3c00e46 1820redo_rt6_select:
8d1040e8 1821 rt = rt6_select(net, fn, oif, strict);
52bd4c0c 1822 if (rt->rt6i_nsiblings)
b4bac172 1823 rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
a3c00e46
MKL
1824 if (rt == net->ipv6.ip6_null_entry) {
1825 fn = fib6_backtrack(fn, &fl6->saddr);
1826 if (fn)
1827 goto redo_rt6_select;
367efcb9
MKL
1828 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1829 /* also consider unreachable route */
1830 strict &= ~RT6_LOOKUP_F_REACHABLE;
1831 fn = saved_fn;
1832 goto redo_rt6_select;
367efcb9 1833 }
a3c00e46
MKL
1834 }
1835
2b760fcf
WW
1836 /*Search through exception table */
1837 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
1838 if (rt_cache)
1839 rt = rt_cache;
fb9de91e 1840
d3843fe5 1841 if (rt == net->ipv6.ip6_null_entry) {
66f5d6ce 1842 rcu_read_unlock();
d3843fe5 1843 dst_hold(&rt->dst);
b65f164d 1844 trace_fib6_table_lookup(net, rt, table, fl6);
d3843fe5
WW
1845 return rt;
1846 } else if (rt->rt6i_flags & RTF_CACHE) {
1847 if (ip6_hold_safe(net, &rt, true)) {
1848 dst_use_noref(&rt->dst, jiffies);
1849 rt6_dst_from_metrics_check(rt);
1850 }
66f5d6ce 1851 rcu_read_unlock();
b65f164d 1852 trace_fib6_table_lookup(net, rt, table, fl6);
d52d3997 1853 return rt;
3da59bd9
MKL
1854 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1855 !(rt->rt6i_flags & RTF_GATEWAY))) {
1856 /* Create a RTF_CACHE clone which will not be
1857 * owned by the fib6 tree. It is for the special case where
1858 * the daddr in the skb during the neighbor look-up is different
1859 * from the fl6->daddr used to look-up route here.
1860 */
1861
1862 struct rt6_info *uncached_rt;
1863
d3843fe5
WW
1864 if (ip6_hold_safe(net, &rt, true)) {
1865 dst_use_noref(&rt->dst, jiffies);
1866 } else {
66f5d6ce 1867 rcu_read_unlock();
d3843fe5
WW
1868 uncached_rt = rt;
1869 goto uncached_rt_out;
1870 }
66f5d6ce 1871 rcu_read_unlock();
d52d3997 1872
3da59bd9
MKL
1873 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1874 dst_release(&rt->dst);
c71099ac 1875
1cfb71ee
WW
1876 if (uncached_rt) {
1877 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1878 * No need for another dst_hold()
1879 */
8d0b94af 1880 rt6_uncached_list_add(uncached_rt);
81eb8447 1881 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1882 } else {
3da59bd9 1883 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1884 dst_hold(&uncached_rt->dst);
1885 }
b811580d 1886
d3843fe5 1887uncached_rt_out:
b65f164d 1888 trace_fib6_table_lookup(net, uncached_rt, table, fl6);
3da59bd9 1889 return uncached_rt;
3da59bd9 1890
d52d3997
MKL
1891 } else {
1892 /* Get a percpu copy */
1893
1894 struct rt6_info *pcpu_rt;
1895
d3843fe5 1896 dst_use_noref(&rt->dst, jiffies);
951f788a 1897 local_bh_disable();
d52d3997 1898 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1899
951f788a 1900 if (!pcpu_rt) {
a94b9367
WW
1901 /* atomic_inc_not_zero() is needed when using rcu */
1902 if (atomic_inc_not_zero(&rt->rt6i_ref)) {
951f788a 1903 /* No dst_hold() on rt is needed because grabbing
a94b9367
WW
1904 * rt->rt6i_ref makes sure rt can't be released.
1905 */
afb1d4b5 1906 pcpu_rt = rt6_make_pcpu_route(net, rt);
a94b9367
WW
1907 rt6_release(rt);
1908 } else {
1909 /* rt is already removed from tree */
a94b9367
WW
1910 pcpu_rt = net->ipv6.ip6_null_entry;
1911 dst_hold(&pcpu_rt->dst);
1912 }
9c7370a1 1913 }
951f788a
ED
1914 local_bh_enable();
1915 rcu_read_unlock();
b65f164d 1916 trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
d52d3997
MKL
1917 return pcpu_rt;
1918 }
1da177e4 1919}
9ff74384 1920EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1921
b75cc8f9
DA
1922static struct rt6_info *ip6_pol_route_input(struct net *net,
1923 struct fib6_table *table,
1924 struct flowi6 *fl6,
1925 const struct sk_buff *skb,
1926 int flags)
4acad72d 1927{
b75cc8f9 1928 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1929}
1930
d409b847
MB
1931struct dst_entry *ip6_route_input_lookup(struct net *net,
1932 struct net_device *dev,
b75cc8f9
DA
1933 struct flowi6 *fl6,
1934 const struct sk_buff *skb,
1935 int flags)
72331bc0
SL
1936{
1937 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1938 flags |= RT6_LOOKUP_F_IFACE;
1939
b75cc8f9 1940 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1941}
d409b847 1942EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1943
23aebdac 1944static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1945 struct flow_keys *keys,
1946 struct flow_keys *flkeys)
23aebdac
JS
1947{
1948 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1949 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1950 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1951 const struct ipv6hdr *inner_iph;
1952 const struct icmp6hdr *icmph;
1953 struct ipv6hdr _inner_iph;
1954
1955 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1956 goto out;
1957
1958 icmph = icmp6_hdr(skb);
1959 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1960 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1961 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1962 icmph->icmp6_type != ICMPV6_PARAMPROB)
1963 goto out;
1964
1965 inner_iph = skb_header_pointer(skb,
1966 skb_transport_offset(skb) + sizeof(*icmph),
1967 sizeof(_inner_iph), &_inner_iph);
1968 if (!inner_iph)
1969 goto out;
1970
1971 key_iph = inner_iph;
5e5d6fed 1972 _flkeys = NULL;
23aebdac 1973out:
5e5d6fed
RP
1974 if (_flkeys) {
1975 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1976 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1977 keys->tags.flow_label = _flkeys->tags.flow_label;
1978 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1979 } else {
1980 keys->addrs.v6addrs.src = key_iph->saddr;
1981 keys->addrs.v6addrs.dst = key_iph->daddr;
1982 keys->tags.flow_label = ip6_flowinfo(key_iph);
1983 keys->basic.ip_proto = key_iph->nexthdr;
1984 }
23aebdac
JS
1985}
1986
1987/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
1988u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1989 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
1990{
1991 struct flow_keys hash_keys;
9a2a537a 1992 u32 mhash;
23aebdac 1993
bbfa047a 1994 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
1995 case 0:
1996 memset(&hash_keys, 0, sizeof(hash_keys));
1997 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1998 if (skb) {
1999 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2000 } else {
2001 hash_keys.addrs.v6addrs.src = fl6->saddr;
2002 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2003 hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
2004 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2005 }
2006 break;
2007 case 1:
2008 if (skb) {
2009 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2010 struct flow_keys keys;
2011
2012 /* short-circuit if we already have L4 hash present */
2013 if (skb->l4_hash)
2014 return skb_get_hash_raw(skb) >> 1;
2015
2016 memset(&hash_keys, 0, sizeof(hash_keys));
2017
2018 if (!flkeys) {
2019 skb_flow_dissect_flow_keys(skb, &keys, flag);
2020 flkeys = &keys;
2021 }
2022 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2023 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2024 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2025 hash_keys.ports.src = flkeys->ports.src;
2026 hash_keys.ports.dst = flkeys->ports.dst;
2027 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2028 } else {
2029 memset(&hash_keys, 0, sizeof(hash_keys));
2030 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2031 hash_keys.addrs.v6addrs.src = fl6->saddr;
2032 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2033 hash_keys.ports.src = fl6->fl6_sport;
2034 hash_keys.ports.dst = fl6->fl6_dport;
2035 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2036 }
2037 break;
23aebdac 2038 }
9a2a537a 2039 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2040
9a2a537a 2041 return mhash >> 1;
23aebdac
JS
2042}
2043
c71099ac
TG
2044void ip6_route_input(struct sk_buff *skb)
2045{
b71d1d42 2046 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2047 struct net *net = dev_net(skb->dev);
adaa70bb 2048 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2049 struct ip_tunnel_info *tun_info;
4c9483b2 2050 struct flowi6 fl6 = {
e0d56fdd 2051 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2052 .daddr = iph->daddr,
2053 .saddr = iph->saddr,
6502ca52 2054 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2055 .flowi6_mark = skb->mark,
2056 .flowi6_proto = iph->nexthdr,
c71099ac 2057 };
5e5d6fed 2058 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2059
904af04d 2060 tun_info = skb_tunnel_info(skb);
46fa062a 2061 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2062 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2063
2064 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2065 flkeys = &_flkeys;
2066
23aebdac 2067 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2068 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2069 skb_dst_drop(skb);
b75cc8f9
DA
2070 skb_dst_set(skb,
2071 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2072}
2073
b75cc8f9
DA
2074static struct rt6_info *ip6_pol_route_output(struct net *net,
2075 struct fib6_table *table,
2076 struct flowi6 *fl6,
2077 const struct sk_buff *skb,
2078 int flags)
1da177e4 2079{
b75cc8f9 2080 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2081}
2082
6f21c96a
PA
2083struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2084 struct flowi6 *fl6, int flags)
c71099ac 2085{
d46a9d67 2086 bool any_src;
c71099ac 2087
4c1feac5
DA
2088 if (rt6_need_strict(&fl6->daddr)) {
2089 struct dst_entry *dst;
2090
2091 dst = l3mdev_link_scope_lookup(net, fl6);
2092 if (dst)
2093 return dst;
2094 }
ca254490 2095
1fb9489b 2096 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2097
d46a9d67 2098 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2099 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2100 (fl6->flowi6_oif && any_src))
77d16f45 2101 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2102
d46a9d67 2103 if (!any_src)
adaa70bb 2104 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2105 else if (sk)
2106 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2107
b75cc8f9 2108 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2109}
6f21c96a 2110EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2111
2774c131 2112struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2113{
5c1e6aa3 2114 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2115 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2116 struct dst_entry *new = NULL;
2117
1dbe3252 2118 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2119 DST_OBSOLETE_DEAD, 0);
14e50e57 2120 if (rt) {
0a1f5962 2121 rt6_info_init(rt);
81eb8447 2122 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2123
0a1f5962 2124 new = &rt->dst;
14e50e57 2125 new->__use = 1;
352e512c 2126 new->input = dst_discard;
ede2059d 2127 new->output = dst_discard_out;
14e50e57 2128
0a1f5962 2129 dst_copy_metrics(new, &ort->dst);
14e50e57 2130
1dbe3252 2131 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2132 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2133 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2134 rt->rt6i_metric = 0;
2135
2136 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2137#ifdef CONFIG_IPV6_SUBTREES
2138 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2139#endif
14e50e57
DM
2140 }
2141
69ead7af
DM
2142 dst_release(dst_orig);
2143 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2144}
14e50e57 2145
1da177e4
LT
2146/*
2147 * Destination cache support functions
2148 */
2149
4b32b5ad
MKL
2150static void rt6_dst_from_metrics_check(struct rt6_info *rt)
2151{
3a2232e9
DM
2152 if (rt->from &&
2153 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst))
2154 dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true);
4b32b5ad
MKL
2155}
2156
3da59bd9
MKL
2157static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
2158{
36143645 2159 u32 rt_cookie = 0;
c5cff856
WW
2160
2161 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
3da59bd9
MKL
2162 return NULL;
2163
2164 if (rt6_check_expired(rt))
2165 return NULL;
2166
2167 return &rt->dst;
2168}
2169
2170static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
2171{
5973fb1e
MKL
2172 if (!__rt6_check_expired(rt) &&
2173 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3a2232e9 2174 rt6_check(rt->from, cookie))
3da59bd9
MKL
2175 return &rt->dst;
2176 else
2177 return NULL;
2178}
2179
1da177e4
LT
2180static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2181{
2182 struct rt6_info *rt;
2183
2184 rt = (struct rt6_info *) dst;
2185
6f3118b5
ND
2186 /* All IPV6 dsts are created with ->obsolete set to the value
2187 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2188 * into this function always.
2189 */
e3bc10bd 2190
4b32b5ad
MKL
2191 rt6_dst_from_metrics_check(rt);
2192
02bcf4e0 2193 if (rt->rt6i_flags & RTF_PCPU ||
3a2232e9 2194 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
3da59bd9
MKL
2195 return rt6_dst_from_check(rt, cookie);
2196 else
2197 return rt6_check(rt, cookie);
1da177e4
LT
2198}
2199
2200static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2201{
2202 struct rt6_info *rt = (struct rt6_info *) dst;
2203
2204 if (rt) {
54c1a859
YH
2205 if (rt->rt6i_flags & RTF_CACHE) {
2206 if (rt6_check_expired(rt)) {
afb1d4b5 2207 ip6_del_rt(dev_net(dst->dev), rt);
54c1a859
YH
2208 dst = NULL;
2209 }
2210 } else {
1da177e4 2211 dst_release(dst);
54c1a859
YH
2212 dst = NULL;
2213 }
1da177e4 2214 }
54c1a859 2215 return dst;
1da177e4
LT
2216}
2217
2218static void ip6_link_failure(struct sk_buff *skb)
2219{
2220 struct rt6_info *rt;
2221
3ffe533c 2222 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2223
adf30907 2224 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2225 if (rt) {
1eb4f758 2226 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0 2227 if (dst_hold_safe(&rt->dst))
afb1d4b5 2228 ip6_del_rt(dev_net(rt->dst.dev), rt);
c5cff856
WW
2229 } else {
2230 struct fib6_node *fn;
2231
2232 rcu_read_lock();
2233 fn = rcu_dereference(rt->rt6i_node);
2234 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2235 fn->fn_sernum = -1;
2236 rcu_read_unlock();
1eb4f758 2237 }
1da177e4
LT
2238 }
2239}
2240
45e4fd26
MKL
2241static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2242{
2243 struct net *net = dev_net(rt->dst.dev);
2244
2245 rt->rt6i_flags |= RTF_MODIFIED;
2246 rt->rt6i_pmtu = mtu;
2247 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2248}
2249
0d3f6d29
MKL
2250static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2251{
2252 return !(rt->rt6i_flags & RTF_CACHE) &&
4e587ea7
WW
2253 (rt->rt6i_flags & RTF_PCPU ||
2254 rcu_access_pointer(rt->rt6i_node));
0d3f6d29
MKL
2255}
2256
45e4fd26
MKL
2257static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2258 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2259{
0dec879f 2260 const struct in6_addr *daddr, *saddr;
67ba4152 2261 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2262
45e4fd26
MKL
2263 if (rt6->rt6i_flags & RTF_LOCAL)
2264 return;
81aded24 2265
19bda36c
XL
2266 if (dst_metric_locked(dst, RTAX_MTU))
2267 return;
2268
0dec879f
JA
2269 if (iph) {
2270 daddr = &iph->daddr;
2271 saddr = &iph->saddr;
2272 } else if (sk) {
2273 daddr = &sk->sk_v6_daddr;
2274 saddr = &inet6_sk(sk)->saddr;
2275 } else {
2276 daddr = NULL;
2277 saddr = NULL;
2278 }
2279 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2280 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2281 if (mtu >= dst_mtu(dst))
2282 return;
9d289715 2283
0d3f6d29 2284 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2285 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2286 /* update rt6_ex->stamp for cache */
2287 if (rt6->rt6i_flags & RTF_CACHE)
2288 rt6_update_exception_stamp_rt(rt6);
0dec879f 2289 } else if (daddr) {
45e4fd26
MKL
2290 struct rt6_info *nrt6;
2291
45e4fd26
MKL
2292 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
2293 if (nrt6) {
2294 rt6_do_update_pmtu(nrt6, mtu);
2b760fcf
WW
2295 if (rt6_insert_exception(nrt6, rt6))
2296 dst_release_immediate(&nrt6->dst);
45e4fd26 2297 }
1da177e4
LT
2298 }
2299}
2300
45e4fd26
MKL
2301static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2302 struct sk_buff *skb, u32 mtu)
2303{
2304 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2305}
2306
42ae66c8 2307void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2308 int oif, u32 mark, kuid_t uid)
81aded24
DM
2309{
2310 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2311 struct dst_entry *dst;
2312 struct flowi6 fl6;
2313
2314 memset(&fl6, 0, sizeof(fl6));
2315 fl6.flowi6_oif = oif;
1b3c61dc 2316 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
2317 fl6.daddr = iph->daddr;
2318 fl6.saddr = iph->saddr;
6502ca52 2319 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2320 fl6.flowi6_uid = uid;
81aded24
DM
2321
2322 dst = ip6_route_output(net, NULL, &fl6);
2323 if (!dst->error)
45e4fd26 2324 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2325 dst_release(dst);
2326}
2327EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2328
2329void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2330{
33c162a9
MKL
2331 struct dst_entry *dst;
2332
81aded24 2333 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 2334 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2335
2336 dst = __sk_dst_get(sk);
2337 if (!dst || !dst->obsolete ||
2338 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2339 return;
2340
2341 bh_lock_sock(sk);
2342 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2343 ip6_datagram_dst_update(sk, false);
2344 bh_unlock_sock(sk);
81aded24
DM
2345}
2346EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2347
7d6850f7
AK
2348void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2349 const struct flowi6 *fl6)
2350{
2351#ifdef CONFIG_IPV6_SUBTREES
2352 struct ipv6_pinfo *np = inet6_sk(sk);
2353#endif
2354
2355 ip6_dst_store(sk, dst,
2356 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2357 &sk->sk_v6_daddr : NULL,
2358#ifdef CONFIG_IPV6_SUBTREES
2359 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2360 &np->saddr :
2361#endif
2362 NULL);
2363}
2364
b55b76b2
DJ
2365/* Handle redirects */
2366struct ip6rd_flowi {
2367 struct flowi6 fl6;
2368 struct in6_addr gateway;
2369};
2370
2371static struct rt6_info *__ip6_route_redirect(struct net *net,
2372 struct fib6_table *table,
2373 struct flowi6 *fl6,
b75cc8f9 2374 const struct sk_buff *skb,
b55b76b2
DJ
2375 int flags)
2376{
2377 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2b760fcf 2378 struct rt6_info *rt, *rt_cache;
b55b76b2
DJ
2379 struct fib6_node *fn;
2380
2381 /* Get the "current" route for this destination and
67c408cf 2382 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2383 *
2384 * RFC 4861 specifies that redirects should only be
2385 * accepted if they come from the nexthop to the target.
2386 * Due to the way the routes are chosen, this notion
2387 * is a bit fuzzy and one might need to check all possible
2388 * routes.
2389 */
2390
66f5d6ce 2391 rcu_read_lock();
b55b76b2
DJ
2392 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2393restart:
66f5d6ce 2394 for_each_fib6_node_rt_rcu(fn) {
5e670d84 2395 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c 2396 continue;
b55b76b2
DJ
2397 if (rt6_check_expired(rt))
2398 continue;
6edb3c96 2399 if (rt->rt6i_flags & RTF_REJECT)
b55b76b2
DJ
2400 break;
2401 if (!(rt->rt6i_flags & RTF_GATEWAY))
2402 continue;
5e670d84 2403 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
b55b76b2 2404 continue;
2b760fcf
WW
2405 /* rt_cache's gateway might be different from its 'parent'
2406 * in the case of an ip redirect.
2407 * So we keep searching in the exception table if the gateway
2408 * is different.
2409 */
5e670d84 2410 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2b760fcf
WW
2411 rt_cache = rt6_find_cached_rt(rt,
2412 &fl6->daddr,
2413 &fl6->saddr);
2414 if (rt_cache &&
2415 ipv6_addr_equal(&rdfl->gateway,
2416 &rt_cache->rt6i_gateway)) {
2417 rt = rt_cache;
2418 break;
2419 }
b55b76b2 2420 continue;
2b760fcf 2421 }
b55b76b2
DJ
2422 break;
2423 }
2424
2425 if (!rt)
2426 rt = net->ipv6.ip6_null_entry;
6edb3c96 2427 else if (rt->rt6i_flags & RTF_REJECT) {
b55b76b2 2428 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2429 goto out;
2430 }
2431
2432 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
2433 fn = fib6_backtrack(fn, &fl6->saddr);
2434 if (fn)
2435 goto restart;
b55b76b2 2436 }
a3c00e46 2437
b0a1ba59 2438out:
d3843fe5 2439 ip6_hold_safe(net, &rt, true);
b55b76b2 2440
66f5d6ce 2441 rcu_read_unlock();
b55b76b2 2442
b65f164d 2443 trace_fib6_table_lookup(net, rt, table, fl6);
b55b76b2
DJ
2444 return rt;
2445};
2446
2447static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2448 const struct flowi6 *fl6,
2449 const struct sk_buff *skb,
2450 const struct in6_addr *gateway)
b55b76b2
DJ
2451{
2452 int flags = RT6_LOOKUP_F_HAS_SADDR;
2453 struct ip6rd_flowi rdfl;
2454
2455 rdfl.fl6 = *fl6;
2456 rdfl.gateway = *gateway;
2457
b75cc8f9 2458 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2459 flags, __ip6_route_redirect);
2460}
2461
e2d118a1
LC
2462void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2463 kuid_t uid)
3a5ad2ee
DM
2464{
2465 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2466 struct dst_entry *dst;
2467 struct flowi6 fl6;
2468
2469 memset(&fl6, 0, sizeof(fl6));
e374c618 2470 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2471 fl6.flowi6_oif = oif;
2472 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2473 fl6.daddr = iph->daddr;
2474 fl6.saddr = iph->saddr;
6502ca52 2475 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2476 fl6.flowi6_uid = uid;
3a5ad2ee 2477
b75cc8f9 2478 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2479 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2480 dst_release(dst);
2481}
2482EXPORT_SYMBOL_GPL(ip6_redirect);
2483
c92a59ec
DJ
2484void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2485 u32 mark)
2486{
2487 const struct ipv6hdr *iph = ipv6_hdr(skb);
2488 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2489 struct dst_entry *dst;
2490 struct flowi6 fl6;
2491
2492 memset(&fl6, 0, sizeof(fl6));
e374c618 2493 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2494 fl6.flowi6_oif = oif;
2495 fl6.flowi6_mark = mark;
c92a59ec
DJ
2496 fl6.daddr = msg->dest;
2497 fl6.saddr = iph->daddr;
e2d118a1 2498 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2499
b75cc8f9 2500 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2501 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2502 dst_release(dst);
2503}
2504
3a5ad2ee
DM
2505void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2506{
e2d118a1
LC
2507 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2508 sk->sk_uid);
3a5ad2ee
DM
2509}
2510EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2511
0dbaee3b 2512static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2513{
0dbaee3b
DM
2514 struct net_device *dev = dst->dev;
2515 unsigned int mtu = dst_mtu(dst);
2516 struct net *net = dev_net(dev);
2517
1da177e4
LT
2518 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2519
5578689a
DL
2520 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2521 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2522
2523 /*
1ab1457c
YH
2524 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2525 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2526 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2527 * rely only on pmtu discovery"
2528 */
2529 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2530 mtu = IPV6_MAXPLEN;
2531 return mtu;
2532}
2533
ebb762f2 2534static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2535{
4b32b5ad
MKL
2536 const struct rt6_info *rt = (const struct rt6_info *)dst;
2537 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 2538 struct inet6_dev *idev;
618f9bc7 2539
4b32b5ad
MKL
2540 if (mtu)
2541 goto out;
2542
2543 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2544 if (mtu)
30f78d8e 2545 goto out;
618f9bc7
SK
2546
2547 mtu = IPV6_MIN_MTU;
d33e4553
DM
2548
2549 rcu_read_lock();
2550 idev = __in6_dev_get(dst->dev);
2551 if (idev)
2552 mtu = idev->cnf.mtu6;
2553 rcu_read_unlock();
2554
30f78d8e 2555out:
14972cbd
RP
2556 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2557
2558 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2559}
2560
3b00944c 2561struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2562 struct flowi6 *fl6)
1da177e4 2563{
87a11578 2564 struct dst_entry *dst;
1da177e4
LT
2565 struct rt6_info *rt;
2566 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2567 struct net *net = dev_net(dev);
1da177e4 2568
38308473 2569 if (unlikely(!idev))
122bdf67 2570 return ERR_PTR(-ENODEV);
1da177e4 2571
ad706862 2572 rt = ip6_dst_alloc(net, dev, 0);
38308473 2573 if (unlikely(!rt)) {
1da177e4 2574 in6_dev_put(idev);
87a11578 2575 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2576 goto out;
2577 }
2578
8e2ec639 2579 rt->dst.flags |= DST_HOST;
588753f1 2580 rt->dst.input = ip6_input;
8e2ec639 2581 rt->dst.output = ip6_output;
550bab42 2582 rt->rt6i_gateway = fl6->daddr;
87a11578 2583 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2584 rt->rt6i_dst.plen = 128;
2585 rt->rt6i_idev = idev;
14edd87d 2586 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2587
4c981e28 2588 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2589 * do proper release of the net_device
2590 */
2591 rt6_uncached_list_add(rt);
81eb8447 2592 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2593
87a11578
DM
2594 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2595
1da177e4 2596out:
87a11578 2597 return dst;
1da177e4
LT
2598}
2599
569d3645 2600static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2601{
86393e52 2602 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2603 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2604 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2605 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2606 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2607 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2608 int entries;
7019b78e 2609
fc66f95c 2610 entries = dst_entries_get_fast(ops);
49a18d86 2611 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2612 entries <= rt_max_size)
1da177e4
LT
2613 goto out;
2614
6891a346 2615 net->ipv6.ip6_rt_gc_expire++;
14956643 2616 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2617 entries = dst_entries_get_slow(ops);
2618 if (entries < ops->gc_thresh)
7019b78e 2619 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2620out:
7019b78e 2621 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2622 return entries > rt_max_size;
1da177e4
LT
2623}
2624
e715b6d3
FW
2625static int ip6_convert_metrics(struct mx6_config *mxc,
2626 const struct fib6_config *cfg)
2627{
6670e152 2628 struct net *net = cfg->fc_nlinfo.nl_net;
c3a8d947 2629 bool ecn_ca = false;
e715b6d3
FW
2630 struct nlattr *nla;
2631 int remaining;
2632 u32 *mp;
2633
63159f29 2634 if (!cfg->fc_mx)
e715b6d3
FW
2635 return 0;
2636
2637 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
2638 if (unlikely(!mp))
2639 return -ENOMEM;
2640
2641 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
2642 int type = nla_type(nla);
1bb14807 2643 u32 val;
e715b6d3 2644
1bb14807
DB
2645 if (!type)
2646 continue;
2647 if (unlikely(type > RTAX_MAX))
2648 goto err;
ea697639 2649
1bb14807
DB
2650 if (type == RTAX_CC_ALGO) {
2651 char tmp[TCP_CA_NAME_MAX];
e715b6d3 2652
1bb14807 2653 nla_strlcpy(tmp, nla, sizeof(tmp));
6670e152 2654 val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
1bb14807
DB
2655 if (val == TCP_CA_UNSPEC)
2656 goto err;
2657 } else {
2658 val = nla_get_u32(nla);
e715b6d3 2659 }
626abd59
PA
2660 if (type == RTAX_HOPLIMIT && val > 255)
2661 val = 255;
b8d3e416
DB
2662 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
2663 goto err;
1bb14807
DB
2664
2665 mp[type - 1] = val;
2666 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
2667 }
2668
c3a8d947
DB
2669 if (ecn_ca) {
2670 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
2671 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
2672 }
e715b6d3 2673
c3a8d947 2674 mxc->mx = mp;
e715b6d3
FW
2675 return 0;
2676 err:
2677 kfree(mp);
2678 return -EINVAL;
2679}
1da177e4 2680
8c14586f
DA
2681static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2682 struct fib6_config *cfg,
f4797b33
DA
2683 const struct in6_addr *gw_addr,
2684 u32 tbid, int flags)
8c14586f
DA
2685{
2686 struct flowi6 fl6 = {
2687 .flowi6_oif = cfg->fc_ifindex,
2688 .daddr = *gw_addr,
2689 .saddr = cfg->fc_prefsrc,
2690 };
2691 struct fib6_table *table;
2692 struct rt6_info *rt;
8c14586f 2693
f4797b33 2694 table = fib6_get_table(net, tbid);
8c14586f
DA
2695 if (!table)
2696 return NULL;
2697
2698 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2699 flags |= RT6_LOOKUP_F_HAS_SADDR;
2700
f4797b33 2701 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2702 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2703
2704 /* if table lookup failed, fall back to full lookup */
2705 if (rt == net->ipv6.ip6_null_entry) {
2706 ip6_rt_put(rt);
2707 rt = NULL;
2708 }
2709
2710 return rt;
2711}
2712
fc1e64e1
DA
2713static int ip6_route_check_nh_onlink(struct net *net,
2714 struct fib6_config *cfg,
9fbb704c 2715 const struct net_device *dev,
fc1e64e1
DA
2716 struct netlink_ext_ack *extack)
2717{
44750f84 2718 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2719 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2720 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2721 struct rt6_info *grt;
2722 int err;
2723
2724 err = 0;
2725 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2726 if (grt) {
58e354c0
DA
2727 if (!grt->dst.error &&
2728 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2729 NL_SET_ERR_MSG(extack,
2730 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2731 err = -EINVAL;
2732 }
2733
2734 ip6_rt_put(grt);
2735 }
2736
2737 return err;
2738}
2739
1edce99f
DA
2740static int ip6_route_check_nh(struct net *net,
2741 struct fib6_config *cfg,
2742 struct net_device **_dev,
2743 struct inet6_dev **idev)
2744{
2745 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2746 struct net_device *dev = _dev ? *_dev : NULL;
2747 struct rt6_info *grt = NULL;
2748 int err = -EHOSTUNREACH;
2749
2750 if (cfg->fc_table) {
f4797b33
DA
2751 int flags = RT6_LOOKUP_F_IFACE;
2752
2753 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2754 cfg->fc_table, flags);
1edce99f
DA
2755 if (grt) {
2756 if (grt->rt6i_flags & RTF_GATEWAY ||
2757 (dev && dev != grt->dst.dev)) {
2758 ip6_rt_put(grt);
2759 grt = NULL;
2760 }
2761 }
2762 }
2763
2764 if (!grt)
b75cc8f9 2765 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2766
2767 if (!grt)
2768 goto out;
2769
2770 if (dev) {
2771 if (dev != grt->dst.dev) {
2772 ip6_rt_put(grt);
2773 goto out;
2774 }
2775 } else {
2776 *_dev = dev = grt->dst.dev;
2777 *idev = grt->rt6i_idev;
2778 dev_hold(dev);
2779 in6_dev_hold(grt->rt6i_idev);
2780 }
2781
2782 if (!(grt->rt6i_flags & RTF_GATEWAY))
2783 err = 0;
2784
2785 ip6_rt_put(grt);
2786
2787out:
2788 return err;
2789}
2790
9fbb704c
DA
2791static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2792 struct net_device **_dev, struct inet6_dev **idev,
2793 struct netlink_ext_ack *extack)
2794{
2795 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2796 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2797 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2798 const struct net_device *dev = *_dev;
232378e8 2799 bool need_addr_check = !dev;
9fbb704c
DA
2800 int err = -EINVAL;
2801
2802 /* if gw_addr is local we will fail to detect this in case
2803 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2804 * will return already-added prefix route via interface that
2805 * prefix route was assigned to, which might be non-loopback.
2806 */
232378e8
DA
2807 if (dev &&
2808 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2809 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2810 goto out;
2811 }
2812
2813 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2814 /* IPv6 strictly inhibits using not link-local
2815 * addresses as nexthop address.
2816 * Otherwise, router will not able to send redirects.
2817 * It is very good, but in some (rare!) circumstances
2818 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2819 * some exceptions. --ANK
2820 * We allow IPv4-mapped nexthops to support RFC4798-type
2821 * addressing
2822 */
2823 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2824 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2825 goto out;
2826 }
2827
2828 if (cfg->fc_flags & RTNH_F_ONLINK)
2829 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2830 else
2831 err = ip6_route_check_nh(net, cfg, _dev, idev);
2832
2833 if (err)
2834 goto out;
2835 }
2836
2837 /* reload in case device was changed */
2838 dev = *_dev;
2839
2840 err = -EINVAL;
2841 if (!dev) {
2842 NL_SET_ERR_MSG(extack, "Egress device not specified");
2843 goto out;
2844 } else if (dev->flags & IFF_LOOPBACK) {
2845 NL_SET_ERR_MSG(extack,
2846 "Egress device can not be loopback device for this route");
2847 goto out;
2848 }
232378e8
DA
2849
2850 /* if we did not check gw_addr above, do so now that the
2851 * egress device has been resolved.
2852 */
2853 if (need_addr_check &&
2854 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2855 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2856 goto out;
2857 }
2858
9fbb704c
DA
2859 err = 0;
2860out:
2861 return err;
2862}
2863
333c4301
DA
2864static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2865 struct netlink_ext_ack *extack)
1da177e4 2866{
5578689a 2867 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
2868 struct rt6_info *rt = NULL;
2869 struct net_device *dev = NULL;
2870 struct inet6_dev *idev = NULL;
c71099ac 2871 struct fib6_table *table;
1da177e4 2872 int addr_type;
8c5b83f0 2873 int err = -EINVAL;
1da177e4 2874
557c44be 2875 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2876 if (cfg->fc_flags & RTF_PCPU) {
2877 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2878 goto out;
d5d531cb 2879 }
557c44be 2880
2ea2352e
WW
2881 /* RTF_CACHE is an internal flag; can not be set by userspace */
2882 if (cfg->fc_flags & RTF_CACHE) {
2883 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2884 goto out;
2885 }
2886
e8478e80
DA
2887 if (cfg->fc_type > RTN_MAX) {
2888 NL_SET_ERR_MSG(extack, "Invalid route type");
2889 goto out;
2890 }
2891
d5d531cb
DA
2892 if (cfg->fc_dst_len > 128) {
2893 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2894 goto out;
2895 }
2896 if (cfg->fc_src_len > 128) {
2897 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2898 goto out;
d5d531cb 2899 }
1da177e4 2900#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2901 if (cfg->fc_src_len) {
2902 NL_SET_ERR_MSG(extack,
2903 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2904 goto out;
d5d531cb 2905 }
1da177e4 2906#endif
86872cb5 2907 if (cfg->fc_ifindex) {
1da177e4 2908 err = -ENODEV;
5578689a 2909 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2910 if (!dev)
2911 goto out;
2912 idev = in6_dev_get(dev);
2913 if (!idev)
2914 goto out;
2915 }
2916
86872cb5
TG
2917 if (cfg->fc_metric == 0)
2918 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2919
fc1e64e1
DA
2920 if (cfg->fc_flags & RTNH_F_ONLINK) {
2921 if (!dev) {
2922 NL_SET_ERR_MSG(extack,
2923 "Nexthop device required for onlink");
2924 err = -ENODEV;
2925 goto out;
2926 }
2927
2928 if (!(dev->flags & IFF_UP)) {
2929 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2930 err = -ENETDOWN;
2931 goto out;
2932 }
2933 }
2934
d71314b4 2935 err = -ENOBUFS;
38308473
DM
2936 if (cfg->fc_nlinfo.nlh &&
2937 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2938 table = fib6_get_table(net, cfg->fc_table);
38308473 2939 if (!table) {
f3213831 2940 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2941 table = fib6_new_table(net, cfg->fc_table);
2942 }
2943 } else {
2944 table = fib6_new_table(net, cfg->fc_table);
2945 }
38308473
DM
2946
2947 if (!table)
c71099ac 2948 goto out;
c71099ac 2949
ad706862
MKL
2950 rt = ip6_dst_alloc(net, NULL,
2951 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 2952
38308473 2953 if (!rt) {
1da177e4
LT
2954 err = -ENOMEM;
2955 goto out;
2956 }
2957
1716a961
G
2958 if (cfg->fc_flags & RTF_EXPIRES)
2959 rt6_set_expires(rt, jiffies +
2960 clock_t_to_jiffies(cfg->fc_expires));
2961 else
2962 rt6_clean_expires(rt);
1da177e4 2963
86872cb5
TG
2964 if (cfg->fc_protocol == RTPROT_UNSPEC)
2965 cfg->fc_protocol = RTPROT_BOOT;
2966 rt->rt6i_protocol = cfg->fc_protocol;
2967
2968 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4 2969
19e42e45
RP
2970 if (cfg->fc_encap) {
2971 struct lwtunnel_state *lwtstate;
2972
30357d7d 2973 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 2974 cfg->fc_encap, AF_INET6, cfg,
9ae28727 2975 &lwtstate, extack);
19e42e45
RP
2976 if (err)
2977 goto out;
5e670d84 2978 rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
19e42e45
RP
2979 }
2980
86872cb5
TG
2981 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2982 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 2983 if (rt->rt6i_dst.plen == 128)
e5fd387a 2984 rt->dst.flags |= DST_HOST;
e5fd387a 2985
1da177e4 2986#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
2987 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2988 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
2989#endif
2990
86872cb5 2991 rt->rt6i_metric = cfg->fc_metric;
5e670d84 2992 rt->fib6_nh.nh_weight = 1;
1da177e4 2993
e8478e80
DA
2994 rt->fib6_type = cfg->fc_type;
2995
1da177e4
LT
2996 /* We cannot add true routes via loopback here,
2997 they would result in kernel looping; promote them to reject routes
2998 */
86872cb5 2999 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
3000 (dev && (dev->flags & IFF_LOOPBACK) &&
3001 !(addr_type & IPV6_ADDR_LOOPBACK) &&
3002 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 3003 /* hold loopback dev/idev if we haven't done so. */
5578689a 3004 if (dev != net->loopback_dev) {
1da177e4
LT
3005 if (dev) {
3006 dev_put(dev);
3007 in6_dev_put(idev);
3008 }
5578689a 3009 dev = net->loopback_dev;
1da177e4
LT
3010 dev_hold(dev);
3011 idev = in6_dev_get(dev);
3012 if (!idev) {
3013 err = -ENODEV;
3014 goto out;
3015 }
3016 }
1da177e4
LT
3017 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
3018 goto install_route;
3019 }
3020
86872cb5 3021 if (cfg->fc_flags & RTF_GATEWAY) {
9fbb704c
DA
3022 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3023 if (err)
48ed7b26 3024 goto out;
1da177e4 3025
5e670d84 3026 rt->fib6_nh.nh_gw = rt->rt6i_gateway = cfg->fc_gateway;
1da177e4
LT
3027 }
3028
3029 err = -ENODEV;
38308473 3030 if (!dev)
1da177e4
LT
3031 goto out;
3032
428604fb
LB
3033 if (idev->cnf.disable_ipv6) {
3034 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3035 err = -EACCES;
3036 goto out;
3037 }
3038
955ec4cb
DA
3039 if (!(dev->flags & IFF_UP)) {
3040 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3041 err = -ENETDOWN;
3042 goto out;
3043 }
3044
c3968a85
DW
3045 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3046 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3047 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3048 err = -EINVAL;
3049 goto out;
3050 }
4e3fd7a0 3051 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
3052 rt->rt6i_prefsrc.plen = 128;
3053 } else
3054 rt->rt6i_prefsrc.plen = 0;
3055
86872cb5 3056 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
3057
3058install_route:
5609b80a
IS
3059 if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3060 !netif_carrier_ok(dev))
5e670d84
DA
3061 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3062 rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
3063 rt->fib6_nh.nh_dev = rt->dst.dev = dev;
1da177e4 3064 rt->rt6i_idev = idev;
c71099ac 3065 rt->rt6i_table = table;
63152fc0 3066
c346dca1 3067 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 3068
8c5b83f0 3069 return rt;
6b9ea5a6
RP
3070out:
3071 if (dev)
3072 dev_put(dev);
3073 if (idev)
3074 in6_dev_put(idev);
587fea74
WW
3075 if (rt)
3076 dst_release_immediate(&rt->dst);
6b9ea5a6 3077
8c5b83f0 3078 return ERR_PTR(err);
6b9ea5a6
RP
3079}
3080
333c4301
DA
3081int ip6_route_add(struct fib6_config *cfg,
3082 struct netlink_ext_ack *extack)
6b9ea5a6
RP
3083{
3084 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 3085 struct rt6_info *rt;
6b9ea5a6
RP
3086 int err;
3087
333c4301 3088 rt = ip6_route_info_create(cfg, extack);
8c5b83f0
RP
3089 if (IS_ERR(rt)) {
3090 err = PTR_ERR(rt);
3091 rt = NULL;
6b9ea5a6 3092 goto out;
8c5b83f0 3093 }
6b9ea5a6 3094
e715b6d3
FW
3095 err = ip6_convert_metrics(&mxc, cfg);
3096 if (err)
3097 goto out;
1da177e4 3098
333c4301 3099 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
e715b6d3
FW
3100
3101 kfree(mxc.mx);
6b9ea5a6 3102
e715b6d3 3103 return err;
1da177e4 3104out:
587fea74
WW
3105 if (rt)
3106 dst_release_immediate(&rt->dst);
6b9ea5a6 3107
1da177e4
LT
3108 return err;
3109}
3110
86872cb5 3111static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4 3112{
afb1d4b5 3113 struct net *net = info->nl_net;
c71099ac 3114 struct fib6_table *table;
afb1d4b5 3115 int err;
1da177e4 3116
a4c2fd7f 3117 if (rt == net->ipv6.ip6_null_entry) {
6825a26c
G
3118 err = -ENOENT;
3119 goto out;
3120 }
6c813a72 3121
c71099ac 3122 table = rt->rt6i_table;
66f5d6ce 3123 spin_lock_bh(&table->tb6_lock);
86872cb5 3124 err = fib6_del(rt, info);
66f5d6ce 3125 spin_unlock_bh(&table->tb6_lock);
1da177e4 3126
6825a26c 3127out:
94e187c0 3128 ip6_rt_put(rt);
1da177e4
LT
3129 return err;
3130}
3131
afb1d4b5 3132int ip6_del_rt(struct net *net, struct rt6_info *rt)
e0a1ad73 3133{
afb1d4b5
DA
3134 struct nl_info info = { .nl_net = net };
3135
528c4ceb 3136 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3137}
3138
0ae81335
DA
3139static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
3140{
3141 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3142 struct net *net = info->nl_net;
16a16cd3 3143 struct sk_buff *skb = NULL;
0ae81335 3144 struct fib6_table *table;
e3330039 3145 int err = -ENOENT;
0ae81335 3146
e3330039
WC
3147 if (rt == net->ipv6.ip6_null_entry)
3148 goto out_put;
0ae81335 3149 table = rt->rt6i_table;
66f5d6ce 3150 spin_lock_bh(&table->tb6_lock);
0ae81335
DA
3151
3152 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
3153 struct rt6_info *sibling, *next_sibling;
3154
16a16cd3
DA
3155 /* prefer to send a single notification with all hops */
3156 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3157 if (skb) {
3158 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3159
e3330039 3160 if (rt6_fill_node(net, skb, rt,
16a16cd3
DA
3161 NULL, NULL, 0, RTM_DELROUTE,
3162 info->portid, seq, 0) < 0) {
3163 kfree_skb(skb);
3164 skb = NULL;
3165 } else
3166 info->skip_notify = 1;
3167 }
3168
0ae81335
DA
3169 list_for_each_entry_safe(sibling, next_sibling,
3170 &rt->rt6i_siblings,
3171 rt6i_siblings) {
3172 err = fib6_del(sibling, info);
3173 if (err)
e3330039 3174 goto out_unlock;
0ae81335
DA
3175 }
3176 }
3177
3178 err = fib6_del(rt, info);
e3330039 3179out_unlock:
66f5d6ce 3180 spin_unlock_bh(&table->tb6_lock);
e3330039 3181out_put:
0ae81335 3182 ip6_rt_put(rt);
16a16cd3
DA
3183
3184 if (skb) {
e3330039 3185 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3186 info->nlh, gfp_any());
3187 }
0ae81335
DA
3188 return err;
3189}
3190
333c4301
DA
3191static int ip6_route_del(struct fib6_config *cfg,
3192 struct netlink_ext_ack *extack)
1da177e4 3193{
2b760fcf 3194 struct rt6_info *rt, *rt_cache;
c71099ac 3195 struct fib6_table *table;
1da177e4 3196 struct fib6_node *fn;
1da177e4
LT
3197 int err = -ESRCH;
3198
5578689a 3199 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3200 if (!table) {
3201 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3202 return err;
d5d531cb 3203 }
c71099ac 3204
66f5d6ce 3205 rcu_read_lock();
1da177e4 3206
c71099ac 3207 fn = fib6_locate(&table->tb6_root,
86872cb5 3208 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3209 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3210 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3211
1da177e4 3212 if (fn) {
66f5d6ce 3213 for_each_fib6_node_rt_rcu(fn) {
2b760fcf
WW
3214 if (cfg->fc_flags & RTF_CACHE) {
3215 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3216 &cfg->fc_src);
3217 if (!rt_cache)
3218 continue;
3219 rt = rt_cache;
3220 }
86872cb5 3221 if (cfg->fc_ifindex &&
5e670d84
DA
3222 (!rt->fib6_nh.nh_dev ||
3223 rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3224 continue;
86872cb5 3225 if (cfg->fc_flags & RTF_GATEWAY &&
5e670d84 3226 !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
1da177e4 3227 continue;
86872cb5 3228 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 3229 continue;
c2ed1880
M
3230 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
3231 continue;
d3843fe5
WW
3232 if (!dst_hold_safe(&rt->dst))
3233 break;
66f5d6ce 3234 rcu_read_unlock();
1da177e4 3235
0ae81335
DA
3236 /* if gateway was specified only delete the one hop */
3237 if (cfg->fc_flags & RTF_GATEWAY)
3238 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3239
3240 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3241 }
3242 }
66f5d6ce 3243 rcu_read_unlock();
1da177e4
LT
3244
3245 return err;
3246}
3247
6700c270 3248static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3249{
a6279458 3250 struct netevent_redirect netevent;
e8599ff4 3251 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3252 struct ndisc_options ndopts;
3253 struct inet6_dev *in6_dev;
3254 struct neighbour *neigh;
71bcdba0 3255 struct rd_msg *msg;
6e157b6a
DM
3256 int optlen, on_link;
3257 u8 *lladdr;
e8599ff4 3258
29a3cad5 3259 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3260 optlen -= sizeof(*msg);
e8599ff4
DM
3261
3262 if (optlen < 0) {
6e157b6a 3263 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3264 return;
3265 }
3266
71bcdba0 3267 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3268
71bcdba0 3269 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3270 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3271 return;
3272 }
3273
6e157b6a 3274 on_link = 0;
71bcdba0 3275 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3276 on_link = 1;
71bcdba0 3277 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3278 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3279 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3280 return;
3281 }
3282
3283 in6_dev = __in6_dev_get(skb->dev);
3284 if (!in6_dev)
3285 return;
3286 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3287 return;
3288
3289 /* RFC2461 8.1:
3290 * The IP source address of the Redirect MUST be the same as the current
3291 * first-hop router for the specified ICMP Destination Address.
3292 */
3293
f997c55c 3294 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3295 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3296 return;
3297 }
6e157b6a
DM
3298
3299 lladdr = NULL;
e8599ff4
DM
3300 if (ndopts.nd_opts_tgt_lladdr) {
3301 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3302 skb->dev);
3303 if (!lladdr) {
3304 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3305 return;
3306 }
3307 }
3308
6e157b6a 3309 rt = (struct rt6_info *) dst;
ec13ad1d 3310 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3311 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3312 return;
6e157b6a 3313 }
e8599ff4 3314
6e157b6a
DM
3315 /* Redirect received -> path was valid.
3316 * Look, redirects are sent only in response to data packets,
3317 * so that this nexthop apparently is reachable. --ANK
3318 */
0dec879f 3319 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3320
71bcdba0 3321 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3322 if (!neigh)
3323 return;
a6279458 3324
1da177e4
LT
3325 /*
3326 * We have finally decided to accept it.
3327 */
3328
f997c55c 3329 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3330 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3331 NEIGH_UPDATE_F_OVERRIDE|
3332 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3333 NEIGH_UPDATE_F_ISROUTER)),
3334 NDISC_REDIRECT, &ndopts);
1da177e4 3335
83a09abd 3336 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 3337 if (!nrt)
1da177e4
LT
3338 goto out;
3339
3340 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3341 if (on_link)
3342 nrt->rt6i_flags &= ~RTF_GATEWAY;
3343
b91d5329 3344 nrt->rt6i_protocol = RTPROT_REDIRECT;
4e3fd7a0 3345 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3346
2b760fcf
WW
3347 /* No need to remove rt from the exception table if rt is
3348 * a cached route because rt6_insert_exception() will
3349 * takes care of it
3350 */
3351 if (rt6_insert_exception(nrt, rt)) {
3352 dst_release_immediate(&nrt->dst);
3353 goto out;
3354 }
1da177e4 3355
d8d1f30b
CG
3356 netevent.old = &rt->dst;
3357 netevent.new = &nrt->dst;
71bcdba0 3358 netevent.daddr = &msg->dest;
60592833 3359 netevent.neigh = neigh;
8d71740c
TT
3360 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3361
1da177e4 3362out:
e8599ff4 3363 neigh_release(neigh);
6e157b6a
DM
3364}
3365
70ceb4f5 3366#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 3367static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 3368 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3369 const struct in6_addr *gwaddr,
3370 struct net_device *dev)
70ceb4f5 3371{
830218c1
DA
3372 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3373 int ifindex = dev->ifindex;
70ceb4f5
YH
3374 struct fib6_node *fn;
3375 struct rt6_info *rt = NULL;
c71099ac
TG
3376 struct fib6_table *table;
3377
830218c1 3378 table = fib6_get_table(net, tb_id);
38308473 3379 if (!table)
c71099ac 3380 return NULL;
70ceb4f5 3381
66f5d6ce 3382 rcu_read_lock();
38fbeeee 3383 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3384 if (!fn)
3385 goto out;
3386
66f5d6ce 3387 for_each_fib6_node_rt_rcu(fn) {
5e670d84 3388 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
70ceb4f5
YH
3389 continue;
3390 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3391 continue;
5e670d84 3392 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
70ceb4f5 3393 continue;
d3843fe5 3394 ip6_hold_safe(NULL, &rt, false);
70ceb4f5
YH
3395 break;
3396 }
3397out:
66f5d6ce 3398 rcu_read_unlock();
70ceb4f5
YH
3399 return rt;
3400}
3401
efa2cea0 3402static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 3403 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3404 const struct in6_addr *gwaddr,
3405 struct net_device *dev,
95c96174 3406 unsigned int pref)
70ceb4f5 3407{
86872cb5 3408 struct fib6_config cfg = {
238fc7ea 3409 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3410 .fc_ifindex = dev->ifindex,
86872cb5
TG
3411 .fc_dst_len = prefixlen,
3412 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3413 RTF_UP | RTF_PREF(pref),
b91d5329 3414 .fc_protocol = RTPROT_RA,
e8478e80 3415 .fc_type = RTN_UNICAST,
15e47304 3416 .fc_nlinfo.portid = 0,
efa2cea0
DL
3417 .fc_nlinfo.nlh = NULL,
3418 .fc_nlinfo.nl_net = net,
86872cb5
TG
3419 };
3420
830218c1 3421 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3422 cfg.fc_dst = *prefix;
3423 cfg.fc_gateway = *gwaddr;
70ceb4f5 3424
e317da96
YH
3425 /* We should treat it as a default route if prefix length is 0. */
3426 if (!prefixlen)
86872cb5 3427 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3428
333c4301 3429 ip6_route_add(&cfg, NULL);
70ceb4f5 3430
830218c1 3431 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3432}
3433#endif
3434
afb1d4b5
DA
3435struct rt6_info *rt6_get_dflt_router(struct net *net,
3436 const struct in6_addr *addr,
3437 struct net_device *dev)
1ab1457c 3438{
830218c1 3439 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
1da177e4 3440 struct rt6_info *rt;
c71099ac 3441 struct fib6_table *table;
1da177e4 3442
afb1d4b5 3443 table = fib6_get_table(net, tb_id);
38308473 3444 if (!table)
c71099ac 3445 return NULL;
1da177e4 3446
66f5d6ce
WW
3447 rcu_read_lock();
3448 for_each_fib6_node_rt_rcu(&table->tb6_root) {
5e670d84 3449 if (dev == rt->fib6_nh.nh_dev &&
045927ff 3450 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
5e670d84 3451 ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
1da177e4
LT
3452 break;
3453 }
3454 if (rt)
d3843fe5 3455 ip6_hold_safe(NULL, &rt, false);
66f5d6ce 3456 rcu_read_unlock();
1da177e4
LT
3457 return rt;
3458}
3459
afb1d4b5
DA
3460struct rt6_info *rt6_add_dflt_router(struct net *net,
3461 const struct in6_addr *gwaddr,
ebacaaa0
YH
3462 struct net_device *dev,
3463 unsigned int pref)
1da177e4 3464{
86872cb5 3465 struct fib6_config cfg = {
ca254490 3466 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3467 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3468 .fc_ifindex = dev->ifindex,
3469 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3470 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3471 .fc_protocol = RTPROT_RA,
e8478e80 3472 .fc_type = RTN_UNICAST,
15e47304 3473 .fc_nlinfo.portid = 0,
5578689a 3474 .fc_nlinfo.nlh = NULL,
afb1d4b5 3475 .fc_nlinfo.nl_net = net,
86872cb5 3476 };
1da177e4 3477
4e3fd7a0 3478 cfg.fc_gateway = *gwaddr;
1da177e4 3479
333c4301 3480 if (!ip6_route_add(&cfg, NULL)) {
830218c1
DA
3481 struct fib6_table *table;
3482
3483 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3484 if (table)
3485 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3486 }
1da177e4 3487
afb1d4b5 3488 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3489}
3490
afb1d4b5
DA
3491static void __rt6_purge_dflt_routers(struct net *net,
3492 struct fib6_table *table)
1da177e4
LT
3493{
3494 struct rt6_info *rt;
3495
3496restart:
66f5d6ce
WW
3497 rcu_read_lock();
3498 for_each_fib6_node_rt_rcu(&table->tb6_root) {
3e8b0ac3
LC
3499 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3500 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d3843fe5 3501 if (dst_hold_safe(&rt->dst)) {
66f5d6ce 3502 rcu_read_unlock();
afb1d4b5 3503 ip6_del_rt(net, rt);
d3843fe5 3504 } else {
66f5d6ce 3505 rcu_read_unlock();
d3843fe5 3506 }
1da177e4
LT
3507 goto restart;
3508 }
3509 }
66f5d6ce 3510 rcu_read_unlock();
830218c1
DA
3511
3512 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3513}
3514
3515void rt6_purge_dflt_routers(struct net *net)
3516{
3517 struct fib6_table *table;
3518 struct hlist_head *head;
3519 unsigned int h;
3520
3521 rcu_read_lock();
3522
3523 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3524 head = &net->ipv6.fib_table_hash[h];
3525 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3526 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3527 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3528 }
3529 }
3530
3531 rcu_read_unlock();
1da177e4
LT
3532}
3533
5578689a
DL
3534static void rtmsg_to_fib6_config(struct net *net,
3535 struct in6_rtmsg *rtmsg,
86872cb5
TG
3536 struct fib6_config *cfg)
3537{
3538 memset(cfg, 0, sizeof(*cfg));
3539
ca254490
DA
3540 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3541 : RT6_TABLE_MAIN;
86872cb5
TG
3542 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3543 cfg->fc_metric = rtmsg->rtmsg_metric;
3544 cfg->fc_expires = rtmsg->rtmsg_info;
3545 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3546 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3547 cfg->fc_flags = rtmsg->rtmsg_flags;
e8478e80 3548 cfg->fc_type = rtmsg->rtmsg_type;
86872cb5 3549
5578689a 3550 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3551
4e3fd7a0
AD
3552 cfg->fc_dst = rtmsg->rtmsg_dst;
3553 cfg->fc_src = rtmsg->rtmsg_src;
3554 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3555}
3556
5578689a 3557int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3558{
86872cb5 3559 struct fib6_config cfg;
1da177e4
LT
3560 struct in6_rtmsg rtmsg;
3561 int err;
3562
67ba4152 3563 switch (cmd) {
1da177e4
LT
3564 case SIOCADDRT: /* Add a route */
3565 case SIOCDELRT: /* Delete a route */
af31f412 3566 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3567 return -EPERM;
3568 err = copy_from_user(&rtmsg, arg,
3569 sizeof(struct in6_rtmsg));
3570 if (err)
3571 return -EFAULT;
86872cb5 3572
5578689a 3573 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3574
1da177e4
LT
3575 rtnl_lock();
3576 switch (cmd) {
3577 case SIOCADDRT:
333c4301 3578 err = ip6_route_add(&cfg, NULL);
1da177e4
LT
3579 break;
3580 case SIOCDELRT:
333c4301 3581 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3582 break;
3583 default:
3584 err = -EINVAL;
3585 }
3586 rtnl_unlock();
3587
3588 return err;
3ff50b79 3589 }
1da177e4
LT
3590
3591 return -EINVAL;
3592}
3593
3594/*
3595 * Drop the packet on the floor
3596 */
3597
d5fdd6ba 3598static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3599{
612f09e8 3600 int type;
adf30907 3601 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3602 switch (ipstats_mib_noroutes) {
3603 case IPSTATS_MIB_INNOROUTES:
0660e03f 3604 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3605 if (type == IPV6_ADDR_ANY) {
bdb7cc64
SS
3606 IP6_INC_STATS(dev_net(dst->dev),
3607 __in6_dev_get_safely(skb->dev),
3bd653c8 3608 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3609 break;
3610 }
3611 /* FALLTHROUGH */
3612 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3613 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3614 ipstats_mib_noroutes);
612f09e8
YH
3615 break;
3616 }
3ffe533c 3617 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3618 kfree_skb(skb);
3619 return 0;
3620}
3621
9ce8ade0
TG
3622static int ip6_pkt_discard(struct sk_buff *skb)
3623{
612f09e8 3624 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3625}
3626
ede2059d 3627static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3628{
adf30907 3629 skb->dev = skb_dst(skb)->dev;
612f09e8 3630 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3631}
3632
9ce8ade0
TG
3633static int ip6_pkt_prohibit(struct sk_buff *skb)
3634{
612f09e8 3635 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3636}
3637
ede2059d 3638static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3639{
adf30907 3640 skb->dev = skb_dst(skb)->dev;
612f09e8 3641 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3642}
3643
1da177e4
LT
3644/*
3645 * Allocate a dst for local (unicast / anycast) address.
3646 */
3647
afb1d4b5
DA
3648struct rt6_info *addrconf_dst_alloc(struct net *net,
3649 struct inet6_dev *idev,
1da177e4 3650 const struct in6_addr *addr,
8f031519 3651 bool anycast)
1da177e4 3652{
ca254490 3653 u32 tb_id;
4832c30d 3654 struct net_device *dev = idev->dev;
5f02ce24
DA
3655 struct rt6_info *rt;
3656
5f02ce24 3657 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
a3300ef4 3658 if (!rt)
1da177e4
LT
3659 return ERR_PTR(-ENOMEM);
3660
1da177e4 3661 in6_dev_hold(idev);
1da177e4 3662 rt->rt6i_idev = idev;
1da177e4 3663
6edb3c96 3664 rt->dst.flags |= DST_HOST;
94b5e0f9 3665 rt->rt6i_protocol = RTPROT_KERNEL;
1da177e4 3666 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
e8478e80
DA
3667 if (anycast) {
3668 rt->fib6_type = RTN_ANYCAST;
58c4fb86 3669 rt->rt6i_flags |= RTF_ANYCAST;
e8478e80
DA
3670 } else {
3671 rt->fib6_type = RTN_LOCAL;
1da177e4 3672 rt->rt6i_flags |= RTF_LOCAL;
e8478e80 3673 }
1da177e4 3674
5e670d84
DA
3675 rt->fib6_nh.nh_gw = *addr;
3676 rt->fib6_nh.nh_dev = dev;
550bab42 3677 rt->rt6i_gateway = *addr;
4e3fd7a0 3678 rt->rt6i_dst.addr = *addr;
1da177e4 3679 rt->rt6i_dst.plen = 128;
ca254490
DA
3680 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3681 rt->rt6i_table = fib6_get_table(net, tb_id);
1da177e4 3682
1da177e4
LT
3683 return rt;
3684}
3685
c3968a85
DW
3686/* remove deleted ip from prefsrc entries */
3687struct arg_dev_net_ip {
3688 struct net_device *dev;
3689 struct net *net;
3690 struct in6_addr *addr;
3691};
3692
3693static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3694{
3695 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3696 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3697 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3698
5e670d84 3699 if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
c3968a85
DW
3700 rt != net->ipv6.ip6_null_entry &&
3701 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
60006a48 3702 spin_lock_bh(&rt6_exception_lock);
c3968a85
DW
3703 /* remove prefsrc entry */
3704 rt->rt6i_prefsrc.plen = 0;
60006a48
WW
3705 /* need to update cache as well */
3706 rt6_exceptions_remove_prefsrc(rt);
3707 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3708 }
3709 return 0;
3710}
3711
3712void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3713{
3714 struct net *net = dev_net(ifp->idev->dev);
3715 struct arg_dev_net_ip adni = {
3716 .dev = ifp->idev->dev,
3717 .net = net,
3718 .addr = &ifp->addr,
3719 };
0c3584d5 3720 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3721}
3722
be7a010d 3723#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3724
3725/* Remove routers and update dst entries when gateway turn into host. */
3726static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3727{
3728 struct in6_addr *gateway = (struct in6_addr *)arg;
3729
2b760fcf 3730 if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
5e670d84 3731 ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
be7a010d
DJ
3732 return -1;
3733 }
b16cb459
WW
3734
3735 /* Further clean up cached routes in exception table.
3736 * This is needed because cached route may have a different
3737 * gateway than its 'parent' in the case of an ip redirect.
3738 */
3739 rt6_exceptions_clean_tohost(rt, gateway);
3740
be7a010d
DJ
3741 return 0;
3742}
3743
3744void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3745{
3746 fib6_clean_all(net, fib6_clean_tohost, gateway);
3747}
3748
2127d95a
IS
3749struct arg_netdev_event {
3750 const struct net_device *dev;
4c981e28
IS
3751 union {
3752 unsigned int nh_flags;
3753 unsigned long event;
3754 };
2127d95a
IS
3755};
3756
d7dedee1
IS
3757static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
3758{
3759 struct rt6_info *iter;
3760 struct fib6_node *fn;
3761
3762 fn = rcu_dereference_protected(rt->rt6i_node,
3763 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3764 iter = rcu_dereference_protected(fn->leaf,
3765 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3766 while (iter) {
3767 if (iter->rt6i_metric == rt->rt6i_metric &&
3768 rt6_qualify_for_ecmp(iter))
3769 return iter;
3770 iter = rcu_dereference_protected(iter->rt6_next,
3771 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3772 }
3773
3774 return NULL;
3775}
3776
3777static bool rt6_is_dead(const struct rt6_info *rt)
3778{
5e670d84
DA
3779 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3780 (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
d7dedee1
IS
3781 rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
3782 return true;
3783
3784 return false;
3785}
3786
3787static int rt6_multipath_total_weight(const struct rt6_info *rt)
3788{
3789 struct rt6_info *iter;
3790 int total = 0;
3791
3792 if (!rt6_is_dead(rt))
5e670d84 3793 total += rt->fib6_nh.nh_weight;
d7dedee1
IS
3794
3795 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
3796 if (!rt6_is_dead(iter))
5e670d84 3797 total += iter->fib6_nh.nh_weight;
d7dedee1
IS
3798 }
3799
3800 return total;
3801}
3802
3803static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
3804{
3805 int upper_bound = -1;
3806
3807 if (!rt6_is_dead(rt)) {
5e670d84 3808 *weight += rt->fib6_nh.nh_weight;
d7dedee1
IS
3809 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3810 total) - 1;
3811 }
5e670d84 3812 atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
d7dedee1
IS
3813}
3814
3815static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
3816{
3817 struct rt6_info *iter;
3818 int weight = 0;
3819
3820 rt6_upper_bound_set(rt, &weight, total);
3821
3822 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3823 rt6_upper_bound_set(iter, &weight, total);
3824}
3825
3826void rt6_multipath_rebalance(struct rt6_info *rt)
3827{
3828 struct rt6_info *first;
3829 int total;
3830
3831 /* In case the entire multipath route was marked for flushing,
3832 * then there is no need to rebalance upon the removal of every
3833 * sibling route.
3834 */
3835 if (!rt->rt6i_nsiblings || rt->should_flush)
3836 return;
3837
3838 /* During lookup routes are evaluated in order, so we need to
3839 * make sure upper bounds are assigned from the first sibling
3840 * onwards.
3841 */
3842 first = rt6_multipath_first_sibling(rt);
3843 if (WARN_ON_ONCE(!first))
3844 return;
3845
3846 total = rt6_multipath_total_weight(first);
3847 rt6_multipath_upper_bound_set(first, total);
3848}
3849
2127d95a
IS
3850static int fib6_ifup(struct rt6_info *rt, void *p_arg)
3851{
3852 const struct arg_netdev_event *arg = p_arg;
7aef6859 3853 struct net *net = dev_net(arg->dev);
2127d95a 3854
5e670d84
DA
3855 if (rt != net->ipv6.ip6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
3856 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
7aef6859 3857 fib6_update_sernum_upto_root(net, rt);
d7dedee1 3858 rt6_multipath_rebalance(rt);
1de178ed 3859 }
2127d95a
IS
3860
3861 return 0;
3862}
3863
3864void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3865{
3866 struct arg_netdev_event arg = {
3867 .dev = dev,
6802f3ad
IS
3868 {
3869 .nh_flags = nh_flags,
3870 },
2127d95a
IS
3871 };
3872
3873 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3874 arg.nh_flags |= RTNH_F_LINKDOWN;
3875
3876 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3877}
3878
1de178ed
IS
3879static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
3880 const struct net_device *dev)
3881{
3882 struct rt6_info *iter;
3883
5e670d84 3884 if (rt->fib6_nh.nh_dev == dev)
1de178ed
IS
3885 return true;
3886 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
5e670d84 3887 if (iter->fib6_nh.nh_dev == dev)
1de178ed
IS
3888 return true;
3889
3890 return false;
3891}
3892
3893static void rt6_multipath_flush(struct rt6_info *rt)
3894{
3895 struct rt6_info *iter;
3896
3897 rt->should_flush = 1;
3898 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3899 iter->should_flush = 1;
3900}
3901
3902static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
3903 const struct net_device *down_dev)
3904{
3905 struct rt6_info *iter;
3906 unsigned int dead = 0;
3907
5e670d84
DA
3908 if (rt->fib6_nh.nh_dev == down_dev ||
3909 rt->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed
IS
3910 dead++;
3911 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
5e670d84
DA
3912 if (iter->fib6_nh.nh_dev == down_dev ||
3913 iter->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed
IS
3914 dead++;
3915
3916 return dead;
3917}
3918
3919static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
3920 const struct net_device *dev,
3921 unsigned int nh_flags)
3922{
3923 struct rt6_info *iter;
3924
5e670d84
DA
3925 if (rt->fib6_nh.nh_dev == dev)
3926 rt->fib6_nh.nh_flags |= nh_flags;
1de178ed 3927 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
5e670d84
DA
3928 if (iter->fib6_nh.nh_dev == dev)
3929 iter->fib6_nh.nh_flags |= nh_flags;
1de178ed
IS
3930}
3931
a1a22c12 3932/* called with write lock held for table with rt */
4c981e28 3933static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
1da177e4 3934{
4c981e28
IS
3935 const struct arg_netdev_event *arg = p_arg;
3936 const struct net_device *dev = arg->dev;
7aef6859 3937 struct net *net = dev_net(dev);
8ed67789 3938
1de178ed 3939 if (rt == net->ipv6.ip6_null_entry)
27c6fa73
IS
3940 return 0;
3941
3942 switch (arg->event) {
3943 case NETDEV_UNREGISTER:
5e670d84 3944 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
27c6fa73 3945 case NETDEV_DOWN:
1de178ed 3946 if (rt->should_flush)
27c6fa73 3947 return -1;
1de178ed 3948 if (!rt->rt6i_nsiblings)
5e670d84 3949 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
1de178ed
IS
3950 if (rt6_multipath_uses_dev(rt, dev)) {
3951 unsigned int count;
3952
3953 count = rt6_multipath_dead_count(rt, dev);
3954 if (rt->rt6i_nsiblings + 1 == count) {
3955 rt6_multipath_flush(rt);
3956 return -1;
3957 }
3958 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3959 RTNH_F_LINKDOWN);
7aef6859 3960 fib6_update_sernum(net, rt);
d7dedee1 3961 rt6_multipath_rebalance(rt);
1de178ed
IS
3962 }
3963 return -2;
27c6fa73 3964 case NETDEV_CHANGE:
5e670d84 3965 if (rt->fib6_nh.nh_dev != dev ||
1de178ed 3966 rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 3967 break;
5e670d84 3968 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 3969 rt6_multipath_rebalance(rt);
27c6fa73 3970 break;
2b241361 3971 }
c159d30c 3972
1da177e4
LT
3973 return 0;
3974}
3975
27c6fa73 3976void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 3977{
4c981e28 3978 struct arg_netdev_event arg = {
8ed67789 3979 .dev = dev,
6802f3ad
IS
3980 {
3981 .event = event,
3982 },
8ed67789
DL
3983 };
3984
4c981e28
IS
3985 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
3986}
3987
3988void rt6_disable_ip(struct net_device *dev, unsigned long event)
3989{
3990 rt6_sync_down_dev(dev, event);
3991 rt6_uncached_list_flush_dev(dev_net(dev), dev);
3992 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
3993}
3994
95c96174 3995struct rt6_mtu_change_arg {
1da177e4 3996 struct net_device *dev;
95c96174 3997 unsigned int mtu;
1da177e4
LT
3998};
3999
4000static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
4001{
4002 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4003 struct inet6_dev *idev;
4004
4005 /* In IPv6 pmtu discovery is not optional,
4006 so that RTAX_MTU lock cannot disable it.
4007 We still use this lock to block changes
4008 caused by addrconf/ndisc.
4009 */
4010
4011 idev = __in6_dev_get(arg->dev);
38308473 4012 if (!idev)
1da177e4
LT
4013 return 0;
4014
4015 /* For administrative MTU increase, there is no way to discover
4016 IPv6 PMTU increase, so PMTU increase should be updated here.
4017 Since RFC 1981 doesn't include administrative MTU increase
4018 update PMTU increase is a MUST. (i.e. jumbo frame)
4019 */
5e670d84 4020 if (rt->fib6_nh.nh_dev == arg->dev &&
4b32b5ad 4021 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
f5bbe7ee 4022 spin_lock_bh(&rt6_exception_lock);
e9fa1495
SB
4023 if (dst_metric_raw(&rt->dst, RTAX_MTU) &&
4024 rt6_mtu_change_route_allowed(idev, rt, arg->mtu))
4b32b5ad 4025 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
e9fa1495 4026 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 4027 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4028 }
1da177e4
LT
4029 return 0;
4030}
4031
95c96174 4032void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4033{
c71099ac
TG
4034 struct rt6_mtu_change_arg arg = {
4035 .dev = dev,
4036 .mtu = mtu,
4037 };
1da177e4 4038
0c3584d5 4039 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4040}
4041
ef7c79ed 4042static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4043 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 4044 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4045 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4046 [RTA_PRIORITY] = { .type = NLA_U32 },
4047 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4048 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4049 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4050 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4051 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4052 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4053 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4054 [RTA_MARK] = { .type = NLA_U32 },
86872cb5
TG
4055};
4056
4057static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4058 struct fib6_config *cfg,
4059 struct netlink_ext_ack *extack)
1da177e4 4060{
86872cb5
TG
4061 struct rtmsg *rtm;
4062 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4063 unsigned int pref;
86872cb5 4064 int err;
1da177e4 4065
fceb6435
JB
4066 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4067 NULL);
86872cb5
TG
4068 if (err < 0)
4069 goto errout;
1da177e4 4070
86872cb5
TG
4071 err = -EINVAL;
4072 rtm = nlmsg_data(nlh);
4073 memset(cfg, 0, sizeof(*cfg));
4074
4075 cfg->fc_table = rtm->rtm_table;
4076 cfg->fc_dst_len = rtm->rtm_dst_len;
4077 cfg->fc_src_len = rtm->rtm_src_len;
4078 cfg->fc_flags = RTF_UP;
4079 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 4080 cfg->fc_type = rtm->rtm_type;
86872cb5 4081
ef2c7d7b
ND
4082 if (rtm->rtm_type == RTN_UNREACHABLE ||
4083 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4084 rtm->rtm_type == RTN_PROHIBIT ||
4085 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4086 cfg->fc_flags |= RTF_REJECT;
4087
ab79ad14
4088 if (rtm->rtm_type == RTN_LOCAL)
4089 cfg->fc_flags |= RTF_LOCAL;
4090
1f56a01f
MKL
4091 if (rtm->rtm_flags & RTM_F_CLONED)
4092 cfg->fc_flags |= RTF_CACHE;
4093
fc1e64e1
DA
4094 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4095
15e47304 4096 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 4097 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 4098 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
4099
4100 if (tb[RTA_GATEWAY]) {
67b61f6c 4101 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4102 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4103 }
86872cb5
TG
4104
4105 if (tb[RTA_DST]) {
4106 int plen = (rtm->rtm_dst_len + 7) >> 3;
4107
4108 if (nla_len(tb[RTA_DST]) < plen)
4109 goto errout;
4110
4111 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4112 }
86872cb5
TG
4113
4114 if (tb[RTA_SRC]) {
4115 int plen = (rtm->rtm_src_len + 7) >> 3;
4116
4117 if (nla_len(tb[RTA_SRC]) < plen)
4118 goto errout;
4119
4120 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4121 }
86872cb5 4122
c3968a85 4123 if (tb[RTA_PREFSRC])
67b61f6c 4124 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4125
86872cb5
TG
4126 if (tb[RTA_OIF])
4127 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4128
4129 if (tb[RTA_PRIORITY])
4130 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4131
4132 if (tb[RTA_METRICS]) {
4133 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4134 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4135 }
86872cb5
TG
4136
4137 if (tb[RTA_TABLE])
4138 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4139
51ebd318
ND
4140 if (tb[RTA_MULTIPATH]) {
4141 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4142 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4143
4144 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4145 cfg->fc_mp_len, extack);
9ed59592
DA
4146 if (err < 0)
4147 goto errout;
51ebd318
ND
4148 }
4149
c78ba6d6
LR
4150 if (tb[RTA_PREF]) {
4151 pref = nla_get_u8(tb[RTA_PREF]);
4152 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4153 pref != ICMPV6_ROUTER_PREF_HIGH)
4154 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4155 cfg->fc_flags |= RTF_PREF(pref);
4156 }
4157
19e42e45
RP
4158 if (tb[RTA_ENCAP])
4159 cfg->fc_encap = tb[RTA_ENCAP];
4160
9ed59592 4161 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4162 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4163
c255bd68 4164 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4165 if (err < 0)
4166 goto errout;
4167 }
4168
32bc201e
XL
4169 if (tb[RTA_EXPIRES]) {
4170 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4171
4172 if (addrconf_finite_timeout(timeout)) {
4173 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4174 cfg->fc_flags |= RTF_EXPIRES;
4175 }
4176 }
4177
86872cb5
TG
4178 err = 0;
4179errout:
4180 return err;
1da177e4
LT
4181}
4182
6b9ea5a6
RP
4183struct rt6_nh {
4184 struct rt6_info *rt6_info;
4185 struct fib6_config r_cfg;
4186 struct mx6_config mxc;
4187 struct list_head next;
4188};
4189
4190static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4191{
4192 struct rt6_nh *nh;
4193
4194 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 4195 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
4196 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4197 nh->r_cfg.fc_ifindex);
4198 }
4199}
4200
4201static int ip6_route_info_append(struct list_head *rt6_nh_list,
4202 struct rt6_info *rt, struct fib6_config *r_cfg)
4203{
4204 struct rt6_nh *nh;
6b9ea5a6
RP
4205 int err = -EEXIST;
4206
4207 list_for_each_entry(nh, rt6_nh_list, next) {
4208 /* check if rt6_info already exists */
f06b7549 4209 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
6b9ea5a6
RP
4210 return err;
4211 }
4212
4213 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4214 if (!nh)
4215 return -ENOMEM;
4216 nh->rt6_info = rt;
4217 err = ip6_convert_metrics(&nh->mxc, r_cfg);
4218 if (err) {
4219 kfree(nh);
4220 return err;
4221 }
4222 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4223 list_add_tail(&nh->next, rt6_nh_list);
4224
4225 return 0;
4226}
4227
3b1137fe
DA
4228static void ip6_route_mpath_notify(struct rt6_info *rt,
4229 struct rt6_info *rt_last,
4230 struct nl_info *info,
4231 __u16 nlflags)
4232{
4233 /* if this is an APPEND route, then rt points to the first route
4234 * inserted and rt_last points to last route inserted. Userspace
4235 * wants a consistent dump of the route which starts at the first
4236 * nexthop. Since sibling routes are always added at the end of
4237 * the list, find the first sibling of the last route appended
4238 */
4239 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
4240 rt = list_first_entry(&rt_last->rt6i_siblings,
4241 struct rt6_info,
4242 rt6i_siblings);
4243 }
4244
4245 if (rt)
4246 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4247}
4248
333c4301
DA
4249static int ip6_route_multipath_add(struct fib6_config *cfg,
4250 struct netlink_ext_ack *extack)
51ebd318 4251{
3b1137fe
DA
4252 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
4253 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4254 struct fib6_config r_cfg;
4255 struct rtnexthop *rtnh;
6b9ea5a6
RP
4256 struct rt6_info *rt;
4257 struct rt6_nh *err_nh;
4258 struct rt6_nh *nh, *nh_safe;
3b1137fe 4259 __u16 nlflags;
51ebd318
ND
4260 int remaining;
4261 int attrlen;
6b9ea5a6
RP
4262 int err = 1;
4263 int nhn = 0;
4264 int replace = (cfg->fc_nlinfo.nlh &&
4265 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4266 LIST_HEAD(rt6_nh_list);
51ebd318 4267
3b1137fe
DA
4268 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4269 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4270 nlflags |= NLM_F_APPEND;
4271
35f1b4e9 4272 remaining = cfg->fc_mp_len;
51ebd318 4273 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4274
6b9ea5a6
RP
4275 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
4276 * rt6_info structs per nexthop
4277 */
51ebd318
ND
4278 while (rtnh_ok(rtnh, remaining)) {
4279 memcpy(&r_cfg, cfg, sizeof(*cfg));
4280 if (rtnh->rtnh_ifindex)
4281 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4282
4283 attrlen = rtnh_attrlen(rtnh);
4284 if (attrlen > 0) {
4285 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4286
4287 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4288 if (nla) {
67b61f6c 4289 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4290 r_cfg.fc_flags |= RTF_GATEWAY;
4291 }
19e42e45
RP
4292 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4293 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4294 if (nla)
4295 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4296 }
6b9ea5a6 4297
68e2ffde 4298 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
333c4301 4299 rt = ip6_route_info_create(&r_cfg, extack);
8c5b83f0
RP
4300 if (IS_ERR(rt)) {
4301 err = PTR_ERR(rt);
4302 rt = NULL;
6b9ea5a6 4303 goto cleanup;
8c5b83f0 4304 }
6b9ea5a6 4305
5e670d84 4306 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
398958ae 4307
6b9ea5a6 4308 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 4309 if (err) {
587fea74 4310 dst_release_immediate(&rt->dst);
6b9ea5a6
RP
4311 goto cleanup;
4312 }
4313
4314 rtnh = rtnh_next(rtnh, &remaining);
4315 }
4316
3b1137fe
DA
4317 /* for add and replace send one notification with all nexthops.
4318 * Skip the notification in fib6_add_rt2node and send one with
4319 * the full route when done
4320 */
4321 info->skip_notify = 1;
4322
6b9ea5a6
RP
4323 err_nh = NULL;
4324 list_for_each_entry(nh, &rt6_nh_list, next) {
3b1137fe 4325 rt_last = nh->rt6_info;
333c4301 4326 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3b1137fe
DA
4327 /* save reference to first route for notification */
4328 if (!rt_notif && !err)
4329 rt_notif = nh->rt6_info;
4330
6b9ea5a6
RP
4331 /* nh->rt6_info is used or freed at this point, reset to NULL*/
4332 nh->rt6_info = NULL;
4333 if (err) {
4334 if (replace && nhn)
4335 ip6_print_replace_route_err(&rt6_nh_list);
4336 err_nh = nh;
4337 goto add_errout;
51ebd318 4338 }
6b9ea5a6 4339
1a72418b 4340 /* Because each route is added like a single route we remove
27596472
MK
4341 * these flags after the first nexthop: if there is a collision,
4342 * we have already failed to add the first nexthop:
4343 * fib6_add_rt2node() has rejected it; when replacing, old
4344 * nexthops have been replaced by first new, the rest should
4345 * be added to it.
1a72418b 4346 */
27596472
MK
4347 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4348 NLM_F_REPLACE);
6b9ea5a6
RP
4349 nhn++;
4350 }
4351
3b1137fe
DA
4352 /* success ... tell user about new route */
4353 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4354 goto cleanup;
4355
4356add_errout:
3b1137fe
DA
4357 /* send notification for routes that were added so that
4358 * the delete notifications sent by ip6_route_del are
4359 * coherent
4360 */
4361 if (rt_notif)
4362 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4363
6b9ea5a6
RP
4364 /* Delete routes that were already added */
4365 list_for_each_entry(nh, &rt6_nh_list, next) {
4366 if (err_nh == nh)
4367 break;
333c4301 4368 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4369 }
4370
4371cleanup:
4372 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
587fea74
WW
4373 if (nh->rt6_info)
4374 dst_release_immediate(&nh->rt6_info->dst);
52fe51f8 4375 kfree(nh->mxc.mx);
6b9ea5a6
RP
4376 list_del(&nh->next);
4377 kfree(nh);
4378 }
4379
4380 return err;
4381}
4382
333c4301
DA
4383static int ip6_route_multipath_del(struct fib6_config *cfg,
4384 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4385{
4386 struct fib6_config r_cfg;
4387 struct rtnexthop *rtnh;
4388 int remaining;
4389 int attrlen;
4390 int err = 1, last_err = 0;
4391
4392 remaining = cfg->fc_mp_len;
4393 rtnh = (struct rtnexthop *)cfg->fc_mp;
4394
4395 /* Parse a Multipath Entry */
4396 while (rtnh_ok(rtnh, remaining)) {
4397 memcpy(&r_cfg, cfg, sizeof(*cfg));
4398 if (rtnh->rtnh_ifindex)
4399 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4400
4401 attrlen = rtnh_attrlen(rtnh);
4402 if (attrlen > 0) {
4403 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4404
4405 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4406 if (nla) {
4407 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4408 r_cfg.fc_flags |= RTF_GATEWAY;
4409 }
4410 }
333c4301 4411 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4412 if (err)
4413 last_err = err;
4414
51ebd318
ND
4415 rtnh = rtnh_next(rtnh, &remaining);
4416 }
4417
4418 return last_err;
4419}
4420
c21ef3e3
DA
4421static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4422 struct netlink_ext_ack *extack)
1da177e4 4423{
86872cb5
TG
4424 struct fib6_config cfg;
4425 int err;
1da177e4 4426
333c4301 4427 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4428 if (err < 0)
4429 return err;
4430
51ebd318 4431 if (cfg.fc_mp)
333c4301 4432 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4433 else {
4434 cfg.fc_delete_all_nh = 1;
333c4301 4435 return ip6_route_del(&cfg, extack);
0ae81335 4436 }
1da177e4
LT
4437}
4438
c21ef3e3
DA
4439static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4440 struct netlink_ext_ack *extack)
1da177e4 4441{
86872cb5
TG
4442 struct fib6_config cfg;
4443 int err;
1da177e4 4444
333c4301 4445 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4446 if (err < 0)
4447 return err;
4448
51ebd318 4449 if (cfg.fc_mp)
333c4301 4450 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4451 else
333c4301 4452 return ip6_route_add(&cfg, extack);
1da177e4
LT
4453}
4454
beb1afac 4455static size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f 4456{
beb1afac
DA
4457 int nexthop_len = 0;
4458
4459 if (rt->rt6i_nsiblings) {
4460 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4461 + NLA_ALIGN(sizeof(struct rtnexthop))
4462 + nla_total_size(16) /* RTA_GATEWAY */
5e670d84 4463 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
beb1afac
DA
4464
4465 nexthop_len *= rt->rt6i_nsiblings;
4466 }
4467
339bf98f
TG
4468 return NLMSG_ALIGN(sizeof(struct rtmsg))
4469 + nla_total_size(16) /* RTA_SRC */
4470 + nla_total_size(16) /* RTA_DST */
4471 + nla_total_size(16) /* RTA_GATEWAY */
4472 + nla_total_size(16) /* RTA_PREFSRC */
4473 + nla_total_size(4) /* RTA_TABLE */
4474 + nla_total_size(4) /* RTA_IIF */
4475 + nla_total_size(4) /* RTA_OIF */
4476 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4477 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4478 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4479 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4480 + nla_total_size(1) /* RTA_PREF */
5e670d84 4481 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
beb1afac
DA
4482 + nexthop_len;
4483}
4484
4485static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
5be083ce 4486 unsigned int *flags, bool skip_oif)
beb1afac 4487{
5e670d84 4488 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
f9d882ea
IS
4489 *flags |= RTNH_F_DEAD;
4490
5e670d84 4491 if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
beb1afac
DA
4492 *flags |= RTNH_F_LINKDOWN;
4493 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
4494 *flags |= RTNH_F_DEAD;
4495 }
4496
4497 if (rt->rt6i_flags & RTF_GATEWAY) {
5e670d84 4498 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
beb1afac
DA
4499 goto nla_put_failure;
4500 }
4501
5e670d84
DA
4502 *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4503 if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
4504 *flags |= RTNH_F_OFFLOAD;
4505
5be083ce 4506 /* not needed for multipath encoding b/c it has a rtnexthop struct */
5e670d84
DA
4507 if (!skip_oif && rt->fib6_nh.nh_dev &&
4508 nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
beb1afac
DA
4509 goto nla_put_failure;
4510
5e670d84
DA
4511 if (rt->fib6_nh.nh_lwtstate &&
4512 lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
beb1afac
DA
4513 goto nla_put_failure;
4514
4515 return 0;
4516
4517nla_put_failure:
4518 return -EMSGSIZE;
4519}
4520
5be083ce 4521/* add multipath next hop */
beb1afac
DA
4522static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
4523{
5e670d84 4524 const struct net_device *dev = rt->fib6_nh.nh_dev;
beb1afac
DA
4525 struct rtnexthop *rtnh;
4526 unsigned int flags = 0;
4527
4528 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4529 if (!rtnh)
4530 goto nla_put_failure;
4531
5e670d84
DA
4532 rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4533 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
beb1afac 4534
5be083ce 4535 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4536 goto nla_put_failure;
4537
4538 rtnh->rtnh_flags = flags;
4539
4540 /* length of rtnetlink header + attributes */
4541 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4542
4543 return 0;
4544
4545nla_put_failure:
4546 return -EMSGSIZE;
339bf98f
TG
4547}
4548
191cd582
BH
4549static int rt6_fill_node(struct net *net,
4550 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 4551 struct in6_addr *dst, struct in6_addr *src,
15e47304 4552 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4553 unsigned int flags)
1da177e4 4554{
4b32b5ad 4555 u32 metrics[RTAX_MAX];
1da177e4 4556 struct rtmsg *rtm;
2d7202bf 4557 struct nlmsghdr *nlh;
e3703b3d 4558 long expires;
9e762a4a 4559 u32 table;
1da177e4 4560
15e47304 4561 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4562 if (!nlh)
26932566 4563 return -EMSGSIZE;
2d7202bf
TG
4564
4565 rtm = nlmsg_data(nlh);
1da177e4
LT
4566 rtm->rtm_family = AF_INET6;
4567 rtm->rtm_dst_len = rt->rt6i_dst.plen;
4568 rtm->rtm_src_len = rt->rt6i_src.plen;
4569 rtm->rtm_tos = 0;
c71099ac 4570 if (rt->rt6i_table)
9e762a4a 4571 table = rt->rt6i_table->tb6_id;
c71099ac 4572 else
9e762a4a
PM
4573 table = RT6_TABLE_UNSPEC;
4574 rtm->rtm_table = table;
c78679e8
DM
4575 if (nla_put_u32(skb, RTA_TABLE, table))
4576 goto nla_put_failure;
e8478e80
DA
4577
4578 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4579 rtm->rtm_flags = 0;
4580 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4581 rtm->rtm_protocol = rt->rt6i_protocol;
1da177e4 4582
38308473 4583 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
4584 rtm->rtm_flags |= RTM_F_CLONED;
4585
4586 if (dst) {
930345ea 4587 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 4588 goto nla_put_failure;
1ab1457c 4589 rtm->rtm_dst_len = 128;
1da177e4 4590 } else if (rtm->rtm_dst_len)
930345ea 4591 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 4592 goto nla_put_failure;
1da177e4
LT
4593#ifdef CONFIG_IPV6_SUBTREES
4594 if (src) {
930345ea 4595 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4596 goto nla_put_failure;
1ab1457c 4597 rtm->rtm_src_len = 128;
c78679e8 4598 } else if (rtm->rtm_src_len &&
930345ea 4599 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 4600 goto nla_put_failure;
1da177e4 4601#endif
7bc570c8
YH
4602 if (iif) {
4603#ifdef CONFIG_IPV6_MROUTE
4604 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
fd61c6ba
DA
4605 int err = ip6mr_get_route(net, skb, rtm, portid);
4606
4607 if (err == 0)
4608 return 0;
4609 if (err < 0)
4610 goto nla_put_failure;
7bc570c8
YH
4611 } else
4612#endif
c78679e8
DM
4613 if (nla_put_u32(skb, RTA_IIF, iif))
4614 goto nla_put_failure;
7bc570c8 4615 } else if (dst) {
1da177e4 4616 struct in6_addr saddr_buf;
c78679e8 4617 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 4618 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4619 goto nla_put_failure;
1da177e4 4620 }
2d7202bf 4621
c3968a85
DW
4622 if (rt->rt6i_prefsrc.plen) {
4623 struct in6_addr saddr_buf;
4e3fd7a0 4624 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 4625 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4626 goto nla_put_failure;
c3968a85
DW
4627 }
4628
4b32b5ad
MKL
4629 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
4630 if (rt->rt6i_pmtu)
4631 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
4632 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
4633 goto nla_put_failure;
4634
c78679e8
DM
4635 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4636 goto nla_put_failure;
8253947e 4637
beb1afac
DA
4638 /* For multipath routes, walk the siblings list and add
4639 * each as a nexthop within RTA_MULTIPATH.
4640 */
4641 if (rt->rt6i_nsiblings) {
4642 struct rt6_info *sibling, *next_sibling;
4643 struct nlattr *mp;
4644
4645 mp = nla_nest_start(skb, RTA_MULTIPATH);
4646 if (!mp)
4647 goto nla_put_failure;
4648
4649 if (rt6_add_nexthop(skb, rt) < 0)
4650 goto nla_put_failure;
4651
4652 list_for_each_entry_safe(sibling, next_sibling,
4653 &rt->rt6i_siblings, rt6i_siblings) {
4654 if (rt6_add_nexthop(skb, sibling) < 0)
4655 goto nla_put_failure;
4656 }
4657
4658 nla_nest_end(skb, mp);
4659 } else {
5be083ce 4660 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4661 goto nla_put_failure;
4662 }
4663
8253947e 4664 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 4665
87a50699 4666 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 4667 goto nla_put_failure;
2d7202bf 4668
c78ba6d6
LR
4669 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4670 goto nla_put_failure;
4671
19e42e45 4672
053c095a
JB
4673 nlmsg_end(skb, nlh);
4674 return 0;
2d7202bf
TG
4675
4676nla_put_failure:
26932566
PM
4677 nlmsg_cancel(skb, nlh);
4678 return -EMSGSIZE;
1da177e4
LT
4679}
4680
1b43af54 4681int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
4682{
4683 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4684 struct net *net = arg->net;
4685
4686 if (rt == net->ipv6.ip6_null_entry)
4687 return 0;
1da177e4 4688
2d7202bf
TG
4689 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4690 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4691
4692 /* user wants prefix routes only */
4693 if (rtm->rtm_flags & RTM_F_PREFIX &&
4694 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4695 /* success since this is not a prefix route */
4696 return 1;
4697 }
4698 }
1da177e4 4699
1f17e2f2 4700 return rt6_fill_node(net,
191cd582 4701 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 4702 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
f8cfe2ce 4703 NLM_F_MULTI);
1da177e4
LT
4704}
4705
c21ef3e3
DA
4706static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4707 struct netlink_ext_ack *extack)
1da177e4 4708{
3b1e0a65 4709 struct net *net = sock_net(in_skb->sk);
ab364a6f 4710 struct nlattr *tb[RTA_MAX+1];
18c3a61c
RP
4711 int err, iif = 0, oif = 0;
4712 struct dst_entry *dst;
ab364a6f 4713 struct rt6_info *rt;
1da177e4 4714 struct sk_buff *skb;
ab364a6f 4715 struct rtmsg *rtm;
4c9483b2 4716 struct flowi6 fl6;
18c3a61c 4717 bool fibmatch;
1da177e4 4718
fceb6435 4719 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4720 extack);
ab364a6f
TG
4721 if (err < 0)
4722 goto errout;
1da177e4 4723
ab364a6f 4724 err = -EINVAL;
4c9483b2 4725 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4726 rtm = nlmsg_data(nlh);
4727 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4728 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4729
ab364a6f
TG
4730 if (tb[RTA_SRC]) {
4731 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4732 goto errout;
4733
4e3fd7a0 4734 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4735 }
4736
4737 if (tb[RTA_DST]) {
4738 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4739 goto errout;
4740
4e3fd7a0 4741 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4742 }
4743
4744 if (tb[RTA_IIF])
4745 iif = nla_get_u32(tb[RTA_IIF]);
4746
4747 if (tb[RTA_OIF])
72331bc0 4748 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4749
2e47b291
LC
4750 if (tb[RTA_MARK])
4751 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4752
622ec2c9
LC
4753 if (tb[RTA_UID])
4754 fl6.flowi6_uid = make_kuid(current_user_ns(),
4755 nla_get_u32(tb[RTA_UID]));
4756 else
4757 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4758
1da177e4
LT
4759 if (iif) {
4760 struct net_device *dev;
72331bc0
SL
4761 int flags = 0;
4762
121622db
FW
4763 rcu_read_lock();
4764
4765 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4766 if (!dev) {
121622db 4767 rcu_read_unlock();
1da177e4 4768 err = -ENODEV;
ab364a6f 4769 goto errout;
1da177e4 4770 }
72331bc0
SL
4771
4772 fl6.flowi6_iif = iif;
4773
4774 if (!ipv6_addr_any(&fl6.saddr))
4775 flags |= RT6_LOOKUP_F_HAS_SADDR;
4776
b75cc8f9 4777 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
4778
4779 rcu_read_unlock();
72331bc0
SL
4780 } else {
4781 fl6.flowi6_oif = oif;
4782
58acfd71 4783 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4784 }
4785
18c3a61c
RP
4786
4787 rt = container_of(dst, struct rt6_info, dst);
4788 if (rt->dst.error) {
4789 err = rt->dst.error;
4790 ip6_rt_put(rt);
4791 goto errout;
1da177e4
LT
4792 }
4793
9d6acb3b
WC
4794 if (rt == net->ipv6.ip6_null_entry) {
4795 err = rt->dst.error;
4796 ip6_rt_put(rt);
4797 goto errout;
4798 }
4799
fba961ab
DM
4800 if (fibmatch && rt->from) {
4801 struct rt6_info *ort = rt->from;
58acfd71
IS
4802
4803 dst_hold(&ort->dst);
4804 ip6_rt_put(rt);
4805 rt = ort;
4806 }
4807
ab364a6f 4808 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4809 if (!skb) {
94e187c0 4810 ip6_rt_put(rt);
ab364a6f
TG
4811 err = -ENOBUFS;
4812 goto errout;
4813 }
1da177e4 4814
d8d1f30b 4815 skb_dst_set(skb, &rt->dst);
18c3a61c
RP
4816 if (fibmatch)
4817 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
4818 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4819 nlh->nlmsg_seq, 0);
4820 else
4821 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
4822 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4823 nlh->nlmsg_seq, 0);
1da177e4 4824 if (err < 0) {
ab364a6f
TG
4825 kfree_skb(skb);
4826 goto errout;
1da177e4
LT
4827 }
4828
15e47304 4829 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4830errout:
1da177e4 4831 return err;
1da177e4
LT
4832}
4833
37a1d361
RP
4834void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4835 unsigned int nlm_flags)
1da177e4
LT
4836{
4837 struct sk_buff *skb;
5578689a 4838 struct net *net = info->nl_net;
528c4ceb
DL
4839 u32 seq;
4840 int err;
4841
4842 err = -ENOBUFS;
38308473 4843 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4844
19e42e45 4845 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4846 if (!skb)
21713ebc
TG
4847 goto errout;
4848
191cd582 4849 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
f8cfe2ce 4850 event, info->portid, seq, nlm_flags);
26932566
PM
4851 if (err < 0) {
4852 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4853 WARN_ON(err == -EMSGSIZE);
4854 kfree_skb(skb);
4855 goto errout;
4856 }
15e47304 4857 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4858 info->nlh, gfp_any());
4859 return;
21713ebc
TG
4860errout:
4861 if (err < 0)
5578689a 4862 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4863}
4864
8ed67789 4865static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4866 unsigned long event, void *ptr)
8ed67789 4867{
351638e7 4868 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 4869 struct net *net = dev_net(dev);
8ed67789 4870
242d3a49
WC
4871 if (!(dev->flags & IFF_LOOPBACK))
4872 return NOTIFY_OK;
4873
4874 if (event == NETDEV_REGISTER) {
d8d1f30b 4875 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
4876 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4877#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 4878 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 4879 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4880 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 4881 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 4882#endif
76da0704
WC
4883 } else if (event == NETDEV_UNREGISTER &&
4884 dev->reg_state != NETREG_UNREGISTERED) {
4885 /* NETDEV_UNREGISTER could be fired for multiple times by
4886 * netdev_wait_allrefs(). Make sure we only call this once.
4887 */
12d94a80 4888 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 4889#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
4890 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4891 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
4892#endif
4893 }
4894
4895 return NOTIFY_OK;
4896}
4897
1da177e4
LT
4898/*
4899 * /proc
4900 */
4901
4902#ifdef CONFIG_PROC_FS
4903
33120b30 4904static const struct file_operations ipv6_route_proc_fops = {
33120b30
AD
4905 .open = ipv6_route_open,
4906 .read = seq_read,
4907 .llseek = seq_lseek,
8d2ca1d7 4908 .release = seq_release_net,
33120b30
AD
4909};
4910
1da177e4
LT
4911static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4912{
69ddb805 4913 struct net *net = (struct net *)seq->private;
1da177e4 4914 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
4915 net->ipv6.rt6_stats->fib_nodes,
4916 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 4917 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
4918 net->ipv6.rt6_stats->fib_rt_entries,
4919 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 4920 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 4921 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
4922
4923 return 0;
4924}
4925
4926static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4927{
de05c557 4928 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
4929}
4930
9a32144e 4931static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
4932 .open = rt6_stats_seq_open,
4933 .read = seq_read,
4934 .llseek = seq_lseek,
b6fcbdb4 4935 .release = single_release_net,
1da177e4
LT
4936};
4937#endif /* CONFIG_PROC_FS */
4938
4939#ifdef CONFIG_SYSCTL
4940
1da177e4 4941static
fe2c6338 4942int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
4943 void __user *buffer, size_t *lenp, loff_t *ppos)
4944{
c486da34
LAG
4945 struct net *net;
4946 int delay;
4947 if (!write)
1da177e4 4948 return -EINVAL;
c486da34
LAG
4949
4950 net = (struct net *)ctl->extra1;
4951 delay = net->ipv6.sysctl.flush_delay;
4952 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 4953 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 4954 return 0;
1da177e4
LT
4955}
4956
fe2c6338 4957struct ctl_table ipv6_route_table_template[] = {
1ab1457c 4958 {
1da177e4 4959 .procname = "flush",
4990509f 4960 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 4961 .maxlen = sizeof(int),
89c8b3a1 4962 .mode = 0200,
6d9f239a 4963 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
4964 },
4965 {
1da177e4 4966 .procname = "gc_thresh",
9a7ec3a9 4967 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
4968 .maxlen = sizeof(int),
4969 .mode = 0644,
6d9f239a 4970 .proc_handler = proc_dointvec,
1da177e4
LT
4971 },
4972 {
1da177e4 4973 .procname = "max_size",
4990509f 4974 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
4975 .maxlen = sizeof(int),
4976 .mode = 0644,
6d9f239a 4977 .proc_handler = proc_dointvec,
1da177e4
LT
4978 },
4979 {
1da177e4 4980 .procname = "gc_min_interval",
4990509f 4981 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4982 .maxlen = sizeof(int),
4983 .mode = 0644,
6d9f239a 4984 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4985 },
4986 {
1da177e4 4987 .procname = "gc_timeout",
4990509f 4988 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
4989 .maxlen = sizeof(int),
4990 .mode = 0644,
6d9f239a 4991 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4992 },
4993 {
1da177e4 4994 .procname = "gc_interval",
4990509f 4995 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
4996 .maxlen = sizeof(int),
4997 .mode = 0644,
6d9f239a 4998 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4999 },
5000 {
1da177e4 5001 .procname = "gc_elasticity",
4990509f 5002 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
5003 .maxlen = sizeof(int),
5004 .mode = 0644,
f3d3f616 5005 .proc_handler = proc_dointvec,
1da177e4
LT
5006 },
5007 {
1da177e4 5008 .procname = "mtu_expires",
4990509f 5009 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
5010 .maxlen = sizeof(int),
5011 .mode = 0644,
6d9f239a 5012 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5013 },
5014 {
1da177e4 5015 .procname = "min_adv_mss",
4990509f 5016 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
5017 .maxlen = sizeof(int),
5018 .mode = 0644,
f3d3f616 5019 .proc_handler = proc_dointvec,
1da177e4
LT
5020 },
5021 {
1da177e4 5022 .procname = "gc_min_interval_ms",
4990509f 5023 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5024 .maxlen = sizeof(int),
5025 .mode = 0644,
6d9f239a 5026 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 5027 },
f8572d8f 5028 { }
1da177e4
LT
5029};
5030
2c8c1e72 5031struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5032{
5033 struct ctl_table *table;
5034
5035 table = kmemdup(ipv6_route_table_template,
5036 sizeof(ipv6_route_table_template),
5037 GFP_KERNEL);
5ee09105
YH
5038
5039 if (table) {
5040 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5041 table[0].extra1 = net;
86393e52 5042 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5043 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5044 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5045 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5046 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5047 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5048 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5049 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5050 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
5051
5052 /* Don't export sysctls to unprivileged users */
5053 if (net->user_ns != &init_user_ns)
5054 table[0].procname = NULL;
5ee09105
YH
5055 }
5056
760f2d01
DL
5057 return table;
5058}
1da177e4
LT
5059#endif
5060
2c8c1e72 5061static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5062{
633d424b 5063 int ret = -ENOMEM;
8ed67789 5064
86393e52
AD
5065 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5066 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5067
fc66f95c
ED
5068 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5069 goto out_ip6_dst_ops;
5070
8ed67789
DL
5071 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5072 sizeof(*net->ipv6.ip6_null_entry),
5073 GFP_KERNEL);
5074 if (!net->ipv6.ip6_null_entry)
fc66f95c 5075 goto out_ip6_dst_entries;
d8d1f30b 5076 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5077 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5078 ip6_template_metrics, true);
8ed67789
DL
5079
5080#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5081 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5082 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5083 sizeof(*net->ipv6.ip6_prohibit_entry),
5084 GFP_KERNEL);
68fffc67
PZ
5085 if (!net->ipv6.ip6_prohibit_entry)
5086 goto out_ip6_null_entry;
d8d1f30b 5087 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5088 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5089 ip6_template_metrics, true);
8ed67789
DL
5090
5091 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5092 sizeof(*net->ipv6.ip6_blk_hole_entry),
5093 GFP_KERNEL);
68fffc67
PZ
5094 if (!net->ipv6.ip6_blk_hole_entry)
5095 goto out_ip6_prohibit_entry;
d8d1f30b 5096 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5097 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5098 ip6_template_metrics, true);
8ed67789
DL
5099#endif
5100
b339a47c
PZ
5101 net->ipv6.sysctl.flush_delay = 0;
5102 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5103 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5104 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5105 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5106 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5107 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5108 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5109
6891a346
BT
5110 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5111
8ed67789
DL
5112 ret = 0;
5113out:
5114 return ret;
f2fc6a54 5115
68fffc67
PZ
5116#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5117out_ip6_prohibit_entry:
5118 kfree(net->ipv6.ip6_prohibit_entry);
5119out_ip6_null_entry:
5120 kfree(net->ipv6.ip6_null_entry);
5121#endif
fc66f95c
ED
5122out_ip6_dst_entries:
5123 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5124out_ip6_dst_ops:
f2fc6a54 5125 goto out;
cdb18761
DL
5126}
5127
2c8c1e72 5128static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5129{
8ed67789
DL
5130 kfree(net->ipv6.ip6_null_entry);
5131#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5132 kfree(net->ipv6.ip6_prohibit_entry);
5133 kfree(net->ipv6.ip6_blk_hole_entry);
5134#endif
41bb78b4 5135 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5136}
5137
d189634e
TG
5138static int __net_init ip6_route_net_init_late(struct net *net)
5139{
5140#ifdef CONFIG_PROC_FS
d4beaa66 5141 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
d6444062 5142 proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
5143#endif
5144 return 0;
5145}
5146
5147static void __net_exit ip6_route_net_exit_late(struct net *net)
5148{
5149#ifdef CONFIG_PROC_FS
ece31ffd
G
5150 remove_proc_entry("ipv6_route", net->proc_net);
5151 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5152#endif
5153}
5154
cdb18761
DL
5155static struct pernet_operations ip6_route_net_ops = {
5156 .init = ip6_route_net_init,
5157 .exit = ip6_route_net_exit,
5158};
5159
c3426b47
DM
5160static int __net_init ipv6_inetpeer_init(struct net *net)
5161{
5162 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5163
5164 if (!bp)
5165 return -ENOMEM;
5166 inet_peer_base_init(bp);
5167 net->ipv6.peers = bp;
5168 return 0;
5169}
5170
5171static void __net_exit ipv6_inetpeer_exit(struct net *net)
5172{
5173 struct inet_peer_base *bp = net->ipv6.peers;
5174
5175 net->ipv6.peers = NULL;
56a6b248 5176 inetpeer_invalidate_tree(bp);
c3426b47
DM
5177 kfree(bp);
5178}
5179
2b823f72 5180static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5181 .init = ipv6_inetpeer_init,
5182 .exit = ipv6_inetpeer_exit,
5183};
5184
d189634e
TG
5185static struct pernet_operations ip6_route_net_late_ops = {
5186 .init = ip6_route_net_init_late,
5187 .exit = ip6_route_net_exit_late,
5188};
5189
8ed67789
DL
5190static struct notifier_block ip6_route_dev_notifier = {
5191 .notifier_call = ip6_route_dev_notify,
242d3a49 5192 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5193};
5194
2f460933
WC
5195void __init ip6_route_init_special_entries(void)
5196{
5197 /* Registering of the loopback is done before this portion of code,
5198 * the loopback reference in rt6_info will not be taken, do it
5199 * manually for init_net */
5200 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5201 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5202 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5203 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5204 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5205 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5206 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5207 #endif
5208}
5209
433d49c3 5210int __init ip6_route_init(void)
1da177e4 5211{
433d49c3 5212 int ret;
8d0b94af 5213 int cpu;
433d49c3 5214
9a7ec3a9
DL
5215 ret = -ENOMEM;
5216 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5217 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5218 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5219 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5220 goto out;
14e50e57 5221
fc66f95c 5222 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5223 if (ret)
bdb3289f 5224 goto out_kmem_cache;
bdb3289f 5225
c3426b47
DM
5226 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5227 if (ret)
e8803b6c 5228 goto out_dst_entries;
2a0c451a 5229
7e52b33b
DM
5230 ret = register_pernet_subsys(&ip6_route_net_ops);
5231 if (ret)
5232 goto out_register_inetpeer;
c3426b47 5233
5dc121e9
AE
5234 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5235
e8803b6c 5236 ret = fib6_init();
433d49c3 5237 if (ret)
8ed67789 5238 goto out_register_subsys;
433d49c3 5239
433d49c3
DL
5240 ret = xfrm6_init();
5241 if (ret)
e8803b6c 5242 goto out_fib6_init;
c35b7e72 5243
433d49c3
DL
5244 ret = fib6_rules_init();
5245 if (ret)
5246 goto xfrm6_init;
7e5449c2 5247
d189634e
TG
5248 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5249 if (ret)
5250 goto fib6_rules_init;
5251
16feebcf
FW
5252 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5253 inet6_rtm_newroute, NULL, 0);
5254 if (ret < 0)
5255 goto out_register_late_subsys;
5256
5257 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5258 inet6_rtm_delroute, NULL, 0);
5259 if (ret < 0)
5260 goto out_register_late_subsys;
5261
5262 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5263 inet6_rtm_getroute, NULL,
5264 RTNL_FLAG_DOIT_UNLOCKED);
5265 if (ret < 0)
d189634e 5266 goto out_register_late_subsys;
c127ea2c 5267
8ed67789 5268 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5269 if (ret)
d189634e 5270 goto out_register_late_subsys;
8ed67789 5271
8d0b94af
MKL
5272 for_each_possible_cpu(cpu) {
5273 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5274
5275 INIT_LIST_HEAD(&ul->head);
5276 spin_lock_init(&ul->lock);
5277 }
5278
433d49c3
DL
5279out:
5280 return ret;
5281
d189634e 5282out_register_late_subsys:
16feebcf 5283 rtnl_unregister_all(PF_INET6);
d189634e 5284 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5285fib6_rules_init:
433d49c3
DL
5286 fib6_rules_cleanup();
5287xfrm6_init:
433d49c3 5288 xfrm6_fini();
2a0c451a
TG
5289out_fib6_init:
5290 fib6_gc_cleanup();
8ed67789
DL
5291out_register_subsys:
5292 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5293out_register_inetpeer:
5294 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5295out_dst_entries:
5296 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5297out_kmem_cache:
f2fc6a54 5298 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5299 goto out;
1da177e4
LT
5300}
5301
5302void ip6_route_cleanup(void)
5303{
8ed67789 5304 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5305 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5306 fib6_rules_cleanup();
1da177e4 5307 xfrm6_fini();
1da177e4 5308 fib6_gc_cleanup();
c3426b47 5309 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5310 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5311 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5312 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5313}