]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - net/ipv6/route.c
xfrm_user: uncoditionally validate esn replay attribute struct
[thirdparty/kernel/stable.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
b811580d 66#include <trace/events/fib6.h>
1da177e4 67
7c0f6ba6 68#include <linux/uaccess.h>
1da177e4
LT
69
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
afc154e9 74enum rt6_nud_state {
7e980569
JB
75 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
78 RT6_NUD_SUCCEED = 1
79};
80
83a09abd 81static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 82static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 83static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 84static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
85static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86static void ip6_dst_destroy(struct dst_entry *);
87static void ip6_dst_ifdown(struct dst_entry *,
88 struct net_device *dev, int how);
569d3645 89static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
90
91static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 92static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 93static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 94static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 95static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
96static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
97 struct sk_buff *skb, u32 mtu);
98static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
99 struct sk_buff *skb);
4b32b5ad 100static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 101static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
16a16cd3
DA
102static size_t rt6_nlmsg_size(struct rt6_info *rt);
103static int rt6_fill_node(struct net *net,
104 struct sk_buff *skb, struct rt6_info *rt,
105 struct in6_addr *dst, struct in6_addr *src,
106 int iif, int type, u32 portid, u32 seq,
107 unsigned int flags);
35732d01
WW
108static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
109 struct in6_addr *daddr,
110 struct in6_addr *saddr);
1da177e4 111
70ceb4f5 112#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 113static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 114 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
115 const struct in6_addr *gwaddr,
116 struct net_device *dev,
95c96174 117 unsigned int pref);
efa2cea0 118static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 119 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
120 const struct in6_addr *gwaddr,
121 struct net_device *dev);
70ceb4f5
YH
122#endif
123
8d0b94af
MKL
124struct uncached_list {
125 spinlock_t lock;
126 struct list_head head;
127};
128
129static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
130
131static void rt6_uncached_list_add(struct rt6_info *rt)
132{
133 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
134
8d0b94af
MKL
135 rt->rt6i_uncached_list = ul;
136
137 spin_lock_bh(&ul->lock);
138 list_add_tail(&rt->rt6i_uncached, &ul->head);
139 spin_unlock_bh(&ul->lock);
140}
141
142static void rt6_uncached_list_del(struct rt6_info *rt)
143{
144 if (!list_empty(&rt->rt6i_uncached)) {
145 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 146 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
147
148 spin_lock_bh(&ul->lock);
149 list_del(&rt->rt6i_uncached);
81eb8447 150 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
151 spin_unlock_bh(&ul->lock);
152 }
153}
154
155static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
156{
157 struct net_device *loopback_dev = net->loopback_dev;
158 int cpu;
159
e332bc67
EB
160 if (dev == loopback_dev)
161 return;
162
8d0b94af
MKL
163 for_each_possible_cpu(cpu) {
164 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
165 struct rt6_info *rt;
166
167 spin_lock_bh(&ul->lock);
168 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
169 struct inet6_dev *rt_idev = rt->rt6i_idev;
170 struct net_device *rt_dev = rt->dst.dev;
171
e332bc67 172 if (rt_idev->dev == dev) {
8d0b94af
MKL
173 rt->rt6i_idev = in6_dev_get(loopback_dev);
174 in6_dev_put(rt_idev);
175 }
176
e332bc67 177 if (rt_dev == dev) {
8d0b94af
MKL
178 rt->dst.dev = loopback_dev;
179 dev_hold(rt->dst.dev);
180 dev_put(rt_dev);
181 }
182 }
183 spin_unlock_bh(&ul->lock);
184 }
185}
186
d52d3997
MKL
187static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
188{
3a2232e9 189 return dst_metrics_write_ptr(&rt->from->dst);
d52d3997
MKL
190}
191
06582540
DM
192static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
193{
4b32b5ad 194 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 195
d52d3997
MKL
196 if (rt->rt6i_flags & RTF_PCPU)
197 return rt6_pcpu_cow_metrics(rt);
198 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
199 return NULL;
200 else
3b471175 201 return dst_cow_metrics_generic(dst, old);
06582540
DM
202}
203
f894cbf8
DM
204static inline const void *choose_neigh_daddr(struct rt6_info *rt,
205 struct sk_buff *skb,
206 const void *daddr)
39232973
DM
207{
208 struct in6_addr *p = &rt->rt6i_gateway;
209
a7563f34 210 if (!ipv6_addr_any(p))
39232973 211 return (const void *) p;
f894cbf8
DM
212 else if (skb)
213 return &ipv6_hdr(skb)->daddr;
39232973
DM
214 return daddr;
215}
216
f894cbf8
DM
217static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
218 struct sk_buff *skb,
219 const void *daddr)
d3aaeb38 220{
39232973
DM
221 struct rt6_info *rt = (struct rt6_info *) dst;
222 struct neighbour *n;
223
f894cbf8 224 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 225 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
226 if (n)
227 return n;
228 return neigh_create(&nd_tbl, daddr, dst->dev);
229}
230
63fca65d
JA
231static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
232{
233 struct net_device *dev = dst->dev;
234 struct rt6_info *rt = (struct rt6_info *)dst;
235
236 daddr = choose_neigh_daddr(rt, NULL, daddr);
237 if (!daddr)
238 return;
239 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
240 return;
241 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
242 return;
243 __ipv6_confirm_neigh(dev, daddr);
244}
245
9a7ec3a9 246static struct dst_ops ip6_dst_ops_template = {
1da177e4 247 .family = AF_INET6,
1da177e4
LT
248 .gc = ip6_dst_gc,
249 .gc_thresh = 1024,
250 .check = ip6_dst_check,
0dbaee3b 251 .default_advmss = ip6_default_advmss,
ebb762f2 252 .mtu = ip6_mtu,
06582540 253 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
254 .destroy = ip6_dst_destroy,
255 .ifdown = ip6_dst_ifdown,
256 .negative_advice = ip6_negative_advice,
257 .link_failure = ip6_link_failure,
258 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 259 .redirect = rt6_do_redirect,
9f8955cc 260 .local_out = __ip6_local_out,
d3aaeb38 261 .neigh_lookup = ip6_neigh_lookup,
63fca65d 262 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
263};
264
ebb762f2 265static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 266{
618f9bc7
SK
267 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
268
269 return mtu ? : dst->dev->mtu;
ec831ea7
RD
270}
271
6700c270
DM
272static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
273 struct sk_buff *skb, u32 mtu)
14e50e57
DM
274{
275}
276
6700c270
DM
277static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
278 struct sk_buff *skb)
b587ee3b
DM
279{
280}
281
14e50e57
DM
282static struct dst_ops ip6_dst_blackhole_ops = {
283 .family = AF_INET6,
14e50e57
DM
284 .destroy = ip6_dst_destroy,
285 .check = ip6_dst_check,
ebb762f2 286 .mtu = ip6_blackhole_mtu,
214f45c9 287 .default_advmss = ip6_default_advmss,
14e50e57 288 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 289 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 290 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 291 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
292};
293
62fa8a84 294static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 295 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
296};
297
fb0af4c7 298static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
299 .dst = {
300 .__refcnt = ATOMIC_INIT(1),
301 .__use = 1,
2c20cbd7 302 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 303 .error = -ENETUNREACH,
d8d1f30b
CG
304 .input = ip6_pkt_discard,
305 .output = ip6_pkt_discard_out,
1da177e4
LT
306 },
307 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 308 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
309 .rt6i_metric = ~(u32) 0,
310 .rt6i_ref = ATOMIC_INIT(1),
311};
312
101367c2
TG
313#ifdef CONFIG_IPV6_MULTIPLE_TABLES
314
fb0af4c7 315static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
316 .dst = {
317 .__refcnt = ATOMIC_INIT(1),
318 .__use = 1,
2c20cbd7 319 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 320 .error = -EACCES,
d8d1f30b
CG
321 .input = ip6_pkt_prohibit,
322 .output = ip6_pkt_prohibit_out,
101367c2
TG
323 },
324 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 325 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
326 .rt6i_metric = ~(u32) 0,
327 .rt6i_ref = ATOMIC_INIT(1),
328};
329
fb0af4c7 330static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
331 .dst = {
332 .__refcnt = ATOMIC_INIT(1),
333 .__use = 1,
2c20cbd7 334 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 335 .error = -EINVAL,
d8d1f30b 336 .input = dst_discard,
ede2059d 337 .output = dst_discard_out,
101367c2
TG
338 },
339 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 340 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
341 .rt6i_metric = ~(u32) 0,
342 .rt6i_ref = ATOMIC_INIT(1),
343};
344
345#endif
346
ebfa45f0
MKL
347static void rt6_info_init(struct rt6_info *rt)
348{
349 struct dst_entry *dst = &rt->dst;
350
351 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
352 INIT_LIST_HEAD(&rt->rt6i_siblings);
353 INIT_LIST_HEAD(&rt->rt6i_uncached);
354}
355
1da177e4 356/* allocate dst with ip6_dst_ops */
d52d3997
MKL
357static struct rt6_info *__ip6_dst_alloc(struct net *net,
358 struct net_device *dev,
ad706862 359 int flags)
1da177e4 360{
97bab73f 361 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 362 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 363
81eb8447 364 if (rt) {
ebfa45f0 365 rt6_info_init(rt);
81eb8447
WW
366 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
367 }
8104891b 368
cf911662 369 return rt;
1da177e4
LT
370}
371
9ab179d8
DA
372struct rt6_info *ip6_dst_alloc(struct net *net,
373 struct net_device *dev,
374 int flags)
d52d3997 375{
ad706862 376 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
377
378 if (rt) {
379 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
bfd8e5a4 380 if (!rt->rt6i_pcpu) {
587fea74 381 dst_release_immediate(&rt->dst);
d52d3997
MKL
382 return NULL;
383 }
384 }
385
386 return rt;
387}
9ab179d8 388EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 389
1da177e4
LT
390static void ip6_dst_destroy(struct dst_entry *dst)
391{
392 struct rt6_info *rt = (struct rt6_info *)dst;
35732d01 393 struct rt6_exception_bucket *bucket;
3a2232e9 394 struct rt6_info *from = rt->from;
8d0b94af 395 struct inet6_dev *idev;
1da177e4 396
4b32b5ad 397 dst_destroy_metrics_generic(dst);
87775312 398 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
399 rt6_uncached_list_del(rt);
400
401 idev = rt->rt6i_idev;
38308473 402 if (idev) {
1da177e4
LT
403 rt->rt6i_idev = NULL;
404 in6_dev_put(idev);
1ab1457c 405 }
35732d01
WW
406 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
407 if (bucket) {
408 rt->rt6i_exception_bucket = NULL;
409 kfree(bucket);
410 }
1716a961 411
3a2232e9
DM
412 rt->from = NULL;
413 dst_release(&from->dst);
b3419363
DM
414}
415
1da177e4
LT
416static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
417 int how)
418{
419 struct rt6_info *rt = (struct rt6_info *)dst;
420 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 421 struct net_device *loopback_dev =
c346dca1 422 dev_net(dev)->loopback_dev;
1da177e4 423
e5645f51
WW
424 if (idev && idev->dev != loopback_dev) {
425 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
426 if (loopback_idev) {
427 rt->rt6i_idev = loopback_idev;
428 in6_dev_put(idev);
97cac082 429 }
1da177e4
LT
430 }
431}
432
5973fb1e
MKL
433static bool __rt6_check_expired(const struct rt6_info *rt)
434{
435 if (rt->rt6i_flags & RTF_EXPIRES)
436 return time_after(jiffies, rt->dst.expires);
437 else
438 return false;
439}
440
a50feda5 441static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 442{
1716a961
G
443 if (rt->rt6i_flags & RTF_EXPIRES) {
444 if (time_after(jiffies, rt->dst.expires))
a50feda5 445 return true;
3a2232e9 446 } else if (rt->from) {
1e2ea8ad 447 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
3a2232e9 448 rt6_check_expired(rt->from);
1716a961 449 }
a50feda5 450 return false;
1da177e4
LT
451}
452
51ebd318 453static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
454 struct flowi6 *fl6, int oif,
455 int strict)
51ebd318
ND
456{
457 struct rt6_info *sibling, *next_sibling;
51ebd318 458
b673d6cc
JS
459 /* We might have already computed the hash for ICMPv6 errors. In such
460 * case it will always be non-zero. Otherwise now is the time to do it.
461 */
462 if (!fl6->mp_hash)
463 fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
464
3d709f69
IS
465 if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
466 return match;
467
468 list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
469 rt6i_siblings) {
470 if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound))
471 continue;
472 if (rt6_score_route(sibling, oif, strict) < 0)
473 break;
474 match = sibling;
475 break;
476 }
477
51ebd318
ND
478 return match;
479}
480
1da177e4 481/*
66f5d6ce 482 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
483 */
484
8ed67789
DL
485static inline struct rt6_info *rt6_device_match(struct net *net,
486 struct rt6_info *rt,
b71d1d42 487 const struct in6_addr *saddr,
1da177e4 488 int oif,
d420895e 489 int flags)
1da177e4
LT
490{
491 struct rt6_info *local = NULL;
492 struct rt6_info *sprt;
493
8067bb8c
IS
494 if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD))
495 return rt;
dd3abc4e 496
071fb37e 497 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
d1918542 498 struct net_device *dev = sprt->dst.dev;
dd3abc4e 499
8067bb8c
IS
500 if (sprt->rt6i_nh_flags & RTNH_F_DEAD)
501 continue;
502
dd3abc4e 503 if (oif) {
1da177e4
LT
504 if (dev->ifindex == oif)
505 return sprt;
506 if (dev->flags & IFF_LOOPBACK) {
38308473 507 if (!sprt->rt6i_idev ||
1da177e4 508 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 509 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 510 continue;
17fb0b2b
DA
511 if (local &&
512 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
513 continue;
514 }
515 local = sprt;
516 }
dd3abc4e
YH
517 } else {
518 if (ipv6_chk_addr(net, saddr, dev,
519 flags & RT6_LOOKUP_F_IFACE))
520 return sprt;
1da177e4 521 }
dd3abc4e 522 }
1da177e4 523
dd3abc4e 524 if (oif) {
1da177e4
LT
525 if (local)
526 return local;
527
d420895e 528 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 529 return net->ipv6.ip6_null_entry;
1da177e4 530 }
8067bb8c
IS
531
532 return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt;
1da177e4
LT
533}
534
27097255 535#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
536struct __rt6_probe_work {
537 struct work_struct work;
538 struct in6_addr target;
539 struct net_device *dev;
540};
541
542static void rt6_probe_deferred(struct work_struct *w)
543{
544 struct in6_addr mcaddr;
545 struct __rt6_probe_work *work =
546 container_of(w, struct __rt6_probe_work, work);
547
548 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 549 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 550 dev_put(work->dev);
662f5533 551 kfree(work);
c2f17e82
HFS
552}
553
27097255
YH
554static void rt6_probe(struct rt6_info *rt)
555{
990edb42 556 struct __rt6_probe_work *work;
f2c31e32 557 struct neighbour *neigh;
27097255
YH
558 /*
559 * Okay, this does not seem to be appropriate
560 * for now, however, we need to check if it
561 * is really so; aka Router Reachability Probing.
562 *
563 * Router Reachability Probe MUST be rate-limited
564 * to no more than one per minute.
565 */
2152caea 566 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 567 return;
2152caea
YH
568 rcu_read_lock_bh();
569 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
570 if (neigh) {
8d6c31bf
MKL
571 if (neigh->nud_state & NUD_VALID)
572 goto out;
573
990edb42 574 work = NULL;
2152caea 575 write_lock(&neigh->lock);
990edb42
MKL
576 if (!(neigh->nud_state & NUD_VALID) &&
577 time_after(jiffies,
578 neigh->updated +
579 rt->rt6i_idev->cnf.rtr_probe_interval)) {
580 work = kmalloc(sizeof(*work), GFP_ATOMIC);
581 if (work)
582 __neigh_set_probe_once(neigh);
c2f17e82 583 }
2152caea 584 write_unlock(&neigh->lock);
990edb42
MKL
585 } else {
586 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 587 }
990edb42
MKL
588
589 if (work) {
590 INIT_WORK(&work->work, rt6_probe_deferred);
591 work->target = rt->rt6i_gateway;
592 dev_hold(rt->dst.dev);
593 work->dev = rt->dst.dev;
594 schedule_work(&work->work);
595 }
596
8d6c31bf 597out:
2152caea 598 rcu_read_unlock_bh();
27097255
YH
599}
600#else
601static inline void rt6_probe(struct rt6_info *rt)
602{
27097255
YH
603}
604#endif
605
1da177e4 606/*
554cfb7e 607 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 608 */
b6f99a21 609static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 610{
d1918542 611 struct net_device *dev = rt->dst.dev;
161980f4 612 if (!oif || dev->ifindex == oif)
554cfb7e 613 return 2;
161980f4
DM
614 if ((dev->flags & IFF_LOOPBACK) &&
615 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
616 return 1;
617 return 0;
554cfb7e 618}
1da177e4 619
afc154e9 620static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 621{
f2c31e32 622 struct neighbour *neigh;
afc154e9 623 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 624
4d0c5911
YH
625 if (rt->rt6i_flags & RTF_NONEXTHOP ||
626 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 627 return RT6_NUD_SUCCEED;
145a3621
YH
628
629 rcu_read_lock_bh();
630 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
631 if (neigh) {
632 read_lock(&neigh->lock);
554cfb7e 633 if (neigh->nud_state & NUD_VALID)
afc154e9 634 ret = RT6_NUD_SUCCEED;
398bcbeb 635#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 636 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 637 ret = RT6_NUD_SUCCEED;
7e980569
JB
638 else
639 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 640#endif
145a3621 641 read_unlock(&neigh->lock);
afc154e9
HFS
642 } else {
643 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 644 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 645 }
145a3621
YH
646 rcu_read_unlock_bh();
647
a5a81f0b 648 return ret;
1da177e4
LT
649}
650
554cfb7e
YH
651static int rt6_score_route(struct rt6_info *rt, int oif,
652 int strict)
1da177e4 653{
a5a81f0b 654 int m;
1ab1457c 655
4d0c5911 656 m = rt6_check_dev(rt, oif);
77d16f45 657 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 658 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
659#ifdef CONFIG_IPV6_ROUTER_PREF
660 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
661#endif
afc154e9
HFS
662 if (strict & RT6_LOOKUP_F_REACHABLE) {
663 int n = rt6_check_neigh(rt);
664 if (n < 0)
665 return n;
666 }
554cfb7e
YH
667 return m;
668}
669
f11e6659 670static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
671 int *mpri, struct rt6_info *match,
672 bool *do_rr)
554cfb7e 673{
f11e6659 674 int m;
afc154e9 675 bool match_do_rr = false;
35103d11 676 struct inet6_dev *idev = rt->rt6i_idev;
35103d11 677
8067bb8c
IS
678 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
679 goto out;
680
14c5206c
IS
681 if (idev->cnf.ignore_routes_with_linkdown &&
682 rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 683 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 684 goto out;
f11e6659
DM
685
686 if (rt6_check_expired(rt))
687 goto out;
688
689 m = rt6_score_route(rt, oif, strict);
7e980569 690 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
691 match_do_rr = true;
692 m = 0; /* lowest valid score */
7e980569 693 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 694 goto out;
afc154e9
HFS
695 }
696
697 if (strict & RT6_LOOKUP_F_REACHABLE)
698 rt6_probe(rt);
f11e6659 699
7e980569 700 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 701 if (m > *mpri) {
afc154e9 702 *do_rr = match_do_rr;
f11e6659
DM
703 *mpri = m;
704 match = rt;
f11e6659 705 }
f11e6659
DM
706out:
707 return match;
708}
709
710static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
8d1040e8 711 struct rt6_info *leaf,
f11e6659 712 struct rt6_info *rr_head,
afc154e9
HFS
713 u32 metric, int oif, int strict,
714 bool *do_rr)
f11e6659 715{
9fbdcfaf 716 struct rt6_info *rt, *match, *cont;
554cfb7e 717 int mpri = -1;
1da177e4 718
f11e6659 719 match = NULL;
9fbdcfaf 720 cont = NULL;
071fb37e 721 for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
9fbdcfaf
SK
722 if (rt->rt6i_metric != metric) {
723 cont = rt;
724 break;
725 }
726
727 match = find_match(rt, oif, strict, &mpri, match, do_rr);
728 }
729
66f5d6ce 730 for (rt = leaf; rt && rt != rr_head;
071fb37e 731 rt = rcu_dereference(rt->rt6_next)) {
9fbdcfaf
SK
732 if (rt->rt6i_metric != metric) {
733 cont = rt;
734 break;
735 }
736
afc154e9 737 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
738 }
739
740 if (match || !cont)
741 return match;
742
071fb37e 743 for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
afc154e9 744 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 745
f11e6659
DM
746 return match;
747}
1da177e4 748
8d1040e8
WW
749static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
750 int oif, int strict)
f11e6659 751{
66f5d6ce 752 struct rt6_info *leaf = rcu_dereference(fn->leaf);
f11e6659 753 struct rt6_info *match, *rt0;
afc154e9 754 bool do_rr = false;
17ecf590 755 int key_plen;
1da177e4 756
87b1af8d 757 if (!leaf || leaf == net->ipv6.ip6_null_entry)
8d1040e8
WW
758 return net->ipv6.ip6_null_entry;
759
66f5d6ce 760 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 761 if (!rt0)
66f5d6ce 762 rt0 = leaf;
1da177e4 763
17ecf590
WW
764 /* Double check to make sure fn is not an intermediate node
765 * and fn->leaf does not points to its child's leaf
766 * (This might happen if all routes under fn are deleted from
767 * the tree and fib6_repair_tree() is called on the node.)
768 */
769 key_plen = rt0->rt6i_dst.plen;
770#ifdef CONFIG_IPV6_SUBTREES
771 if (rt0->rt6i_src.plen)
772 key_plen = rt0->rt6i_src.plen;
773#endif
774 if (fn->fn_bit != key_plen)
775 return net->ipv6.ip6_null_entry;
776
8d1040e8 777 match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
afc154e9 778 &do_rr);
1da177e4 779
afc154e9 780 if (do_rr) {
071fb37e 781 struct rt6_info *next = rcu_dereference(rt0->rt6_next);
f11e6659 782
554cfb7e 783 /* no entries matched; do round-robin */
f11e6659 784 if (!next || next->rt6i_metric != rt0->rt6i_metric)
8d1040e8 785 next = leaf;
f11e6659 786
66f5d6ce
WW
787 if (next != rt0) {
788 spin_lock_bh(&leaf->rt6i_table->tb6_lock);
789 /* make sure next is not being deleted from the tree */
790 if (next->rt6i_node)
791 rcu_assign_pointer(fn->rr_ptr, next);
792 spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
793 }
1da177e4 794 }
1da177e4 795
a02cec21 796 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
797}
798
8b9df265
MKL
799static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
800{
801 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
802}
803
70ceb4f5
YH
804#ifdef CONFIG_IPV6_ROUTE_INFO
805int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 806 const struct in6_addr *gwaddr)
70ceb4f5 807{
c346dca1 808 struct net *net = dev_net(dev);
70ceb4f5
YH
809 struct route_info *rinfo = (struct route_info *) opt;
810 struct in6_addr prefix_buf, *prefix;
811 unsigned int pref;
4bed72e4 812 unsigned long lifetime;
70ceb4f5
YH
813 struct rt6_info *rt;
814
815 if (len < sizeof(struct route_info)) {
816 return -EINVAL;
817 }
818
819 /* Sanity check for prefix_len and length */
820 if (rinfo->length > 3) {
821 return -EINVAL;
822 } else if (rinfo->prefix_len > 128) {
823 return -EINVAL;
824 } else if (rinfo->prefix_len > 64) {
825 if (rinfo->length < 2) {
826 return -EINVAL;
827 }
828 } else if (rinfo->prefix_len > 0) {
829 if (rinfo->length < 1) {
830 return -EINVAL;
831 }
832 }
833
834 pref = rinfo->route_pref;
835 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 836 return -EINVAL;
70ceb4f5 837
4bed72e4 838 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
839
840 if (rinfo->length == 3)
841 prefix = (struct in6_addr *)rinfo->prefix;
842 else {
843 /* this function is safe */
844 ipv6_addr_prefix(&prefix_buf,
845 (struct in6_addr *)rinfo->prefix,
846 rinfo->prefix_len);
847 prefix = &prefix_buf;
848 }
849
f104a567
DJ
850 if (rinfo->prefix_len == 0)
851 rt = rt6_get_dflt_router(gwaddr, dev);
852 else
853 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 854 gwaddr, dev);
70ceb4f5
YH
855
856 if (rt && !lifetime) {
e0a1ad73 857 ip6_del_rt(rt);
70ceb4f5
YH
858 rt = NULL;
859 }
860
861 if (!rt && lifetime)
830218c1
DA
862 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
863 dev, pref);
70ceb4f5
YH
864 else if (rt)
865 rt->rt6i_flags = RTF_ROUTEINFO |
866 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
867
868 if (rt) {
1716a961
G
869 if (!addrconf_finite_timeout(lifetime))
870 rt6_clean_expires(rt);
871 else
872 rt6_set_expires(rt, jiffies + HZ * lifetime);
873
94e187c0 874 ip6_rt_put(rt);
70ceb4f5
YH
875 }
876 return 0;
877}
878#endif
879
a3c00e46
MKL
880static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
881 struct in6_addr *saddr)
882{
66f5d6ce 883 struct fib6_node *pn, *sn;
a3c00e46
MKL
884 while (1) {
885 if (fn->fn_flags & RTN_TL_ROOT)
886 return NULL;
66f5d6ce
WW
887 pn = rcu_dereference(fn->parent);
888 sn = FIB6_SUBTREE(pn);
889 if (sn && sn != fn)
890 fn = fib6_lookup(sn, NULL, saddr);
a3c00e46
MKL
891 else
892 fn = pn;
893 if (fn->fn_flags & RTN_RTINFO)
894 return fn;
895 }
896}
c71099ac 897
d3843fe5
WW
898static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
899 bool null_fallback)
900{
901 struct rt6_info *rt = *prt;
902
903 if (dst_hold_safe(&rt->dst))
904 return true;
905 if (null_fallback) {
906 rt = net->ipv6.ip6_null_entry;
907 dst_hold(&rt->dst);
908 } else {
909 rt = NULL;
910 }
911 *prt = rt;
912 return false;
913}
914
8ed67789
DL
915static struct rt6_info *ip6_pol_route_lookup(struct net *net,
916 struct fib6_table *table,
4c9483b2 917 struct flowi6 *fl6, int flags)
1da177e4 918{
2b760fcf 919 struct rt6_info *rt, *rt_cache;
1da177e4 920 struct fib6_node *fn;
1da177e4 921
66f5d6ce 922 rcu_read_lock();
4c9483b2 923 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 924restart:
66f5d6ce
WW
925 rt = rcu_dereference(fn->leaf);
926 if (!rt) {
927 rt = net->ipv6.ip6_null_entry;
928 } else {
929 rt = rt6_device_match(net, rt, &fl6->saddr,
930 fl6->flowi6_oif, flags);
931 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
932 rt = rt6_multipath_select(rt, fl6,
933 fl6->flowi6_oif, flags);
934 }
a3c00e46
MKL
935 if (rt == net->ipv6.ip6_null_entry) {
936 fn = fib6_backtrack(fn, &fl6->saddr);
937 if (fn)
938 goto restart;
939 }
2b760fcf
WW
940 /* Search through exception table */
941 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
942 if (rt_cache)
943 rt = rt_cache;
944
d3843fe5
WW
945 if (ip6_hold_safe(net, &rt, true))
946 dst_use_noref(&rt->dst, jiffies);
947
66f5d6ce 948 rcu_read_unlock();
b811580d 949
b65f164d 950 trace_fib6_table_lookup(net, rt, table, fl6);
b811580d 951
c71099ac
TG
952 return rt;
953
954}
955
67ba4152 956struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
957 int flags)
958{
959 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
960}
961EXPORT_SYMBOL_GPL(ip6_route_lookup);
962
9acd9f3a
YH
963struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
964 const struct in6_addr *saddr, int oif, int strict)
c71099ac 965{
4c9483b2
DM
966 struct flowi6 fl6 = {
967 .flowi6_oif = oif,
968 .daddr = *daddr,
c71099ac
TG
969 };
970 struct dst_entry *dst;
77d16f45 971 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 972
adaa70bb 973 if (saddr) {
4c9483b2 974 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
975 flags |= RT6_LOOKUP_F_HAS_SADDR;
976 }
977
4c9483b2 978 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
979 if (dst->error == 0)
980 return (struct rt6_info *) dst;
981
982 dst_release(dst);
983
1da177e4
LT
984 return NULL;
985}
7159039a
YH
986EXPORT_SYMBOL(rt6_lookup);
987
c71099ac 988/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
989 * It takes new route entry, the addition fails by any reason the
990 * route is released.
991 * Caller must hold dst before calling it.
1da177e4
LT
992 */
993
e5fd387a 994static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
333c4301
DA
995 struct mx6_config *mxc,
996 struct netlink_ext_ack *extack)
1da177e4
LT
997{
998 int err;
c71099ac 999 struct fib6_table *table;
1da177e4 1000
c71099ac 1001 table = rt->rt6i_table;
66f5d6ce 1002 spin_lock_bh(&table->tb6_lock);
333c4301 1003 err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
66f5d6ce 1004 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1005
1006 return err;
1007}
1008
40e22e8f
TG
1009int ip6_ins_rt(struct rt6_info *rt)
1010{
e715b6d3
FW
1011 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
1012 struct mx6_config mxc = { .mx = NULL, };
1013
1cfb71ee
WW
1014 /* Hold dst to account for the reference from the fib6 tree */
1015 dst_hold(&rt->dst);
333c4301 1016 return __ip6_ins_rt(rt, &info, &mxc, NULL);
40e22e8f
TG
1017}
1018
4832c30d
DA
1019/* called with rcu_lock held */
1020static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
1021{
1022 struct net_device *dev = rt->dst.dev;
1023
98d11291 1024 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
4832c30d
DA
1025 /* for copies of local routes, dst->dev needs to be the
1026 * device if it is a master device, the master device if
1027 * device is enslaved, and the loopback as the default
1028 */
1029 if (netif_is_l3_slave(dev) &&
1030 !rt6_need_strict(&rt->rt6i_dst.addr))
1031 dev = l3mdev_master_dev_rcu(dev);
1032 else if (!netif_is_l3_master(dev))
1033 dev = dev_net(dev)->loopback_dev;
1034 /* last case is netif_is_l3_master(dev) is true in which
1035 * case we want dev returned to be dev
1036 */
1037 }
1038
1039 return dev;
1040}
1041
8b9df265
MKL
1042static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
1043 const struct in6_addr *daddr,
1044 const struct in6_addr *saddr)
1da177e4 1045{
4832c30d 1046 struct net_device *dev;
1da177e4
LT
1047 struct rt6_info *rt;
1048
1049 /*
1050 * Clone the route.
1051 */
1052
d52d3997 1053 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
3a2232e9 1054 ort = ort->from;
1da177e4 1055
4832c30d
DA
1056 rcu_read_lock();
1057 dev = ip6_rt_get_dev_rcu(ort);
1058 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1059 rcu_read_unlock();
83a09abd
MKL
1060 if (!rt)
1061 return NULL;
1062
1063 ip6_rt_copy_init(rt, ort);
1064 rt->rt6i_flags |= RTF_CACHE;
1065 rt->rt6i_metric = 0;
1066 rt->dst.flags |= DST_HOST;
1067 rt->rt6i_dst.addr = *daddr;
1068 rt->rt6i_dst.plen = 128;
1da177e4 1069
83a09abd
MKL
1070 if (!rt6_is_gw_or_nonexthop(ort)) {
1071 if (ort->rt6i_dst.plen != 128 &&
1072 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1073 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1074#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1075 if (rt->rt6i_src.plen && saddr) {
1076 rt->rt6i_src.addr = *saddr;
1077 rt->rt6i_src.plen = 128;
8b9df265 1078 }
83a09abd 1079#endif
95a9a5ba 1080 }
1da177e4 1081
95a9a5ba
YH
1082 return rt;
1083}
1da177e4 1084
d52d3997
MKL
1085static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1086{
4832c30d 1087 struct net_device *dev;
d52d3997
MKL
1088 struct rt6_info *pcpu_rt;
1089
4832c30d
DA
1090 rcu_read_lock();
1091 dev = ip6_rt_get_dev_rcu(rt);
1092 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1093 rcu_read_unlock();
d52d3997
MKL
1094 if (!pcpu_rt)
1095 return NULL;
1096 ip6_rt_copy_init(pcpu_rt, rt);
1097 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1098 pcpu_rt->rt6i_flags |= RTF_PCPU;
1099 return pcpu_rt;
1100}
1101
66f5d6ce 1102/* It should be called with rcu_read_lock() acquired */
d52d3997
MKL
1103static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1104{
a73e4195 1105 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1106
1107 p = this_cpu_ptr(rt->rt6i_pcpu);
1108 pcpu_rt = *p;
1109
d3843fe5 1110 if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
a73e4195 1111 rt6_dst_from_metrics_check(pcpu_rt);
d3843fe5 1112
a73e4195
MKL
1113 return pcpu_rt;
1114}
1115
1116static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1117{
1118 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1119
1120 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1121 if (!pcpu_rt) {
1122 struct net *net = dev_net(rt->dst.dev);
1123
9c7370a1
MKL
1124 dst_hold(&net->ipv6.ip6_null_entry->dst);
1125 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1126 }
1127
a94b9367
WW
1128 dst_hold(&pcpu_rt->dst);
1129 p = this_cpu_ptr(rt->rt6i_pcpu);
1130 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1131 BUG_ON(prev);
a94b9367 1132
d52d3997
MKL
1133 rt6_dst_from_metrics_check(pcpu_rt);
1134 return pcpu_rt;
1135}
1136
35732d01
WW
1137/* exception hash table implementation
1138 */
1139static DEFINE_SPINLOCK(rt6_exception_lock);
1140
1141/* Remove rt6_ex from hash table and free the memory
1142 * Caller must hold rt6_exception_lock
1143 */
1144static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1145 struct rt6_exception *rt6_ex)
1146{
b2427e67 1147 struct net *net;
81eb8447 1148
35732d01
WW
1149 if (!bucket || !rt6_ex)
1150 return;
b2427e67
CIK
1151
1152 net = dev_net(rt6_ex->rt6i->dst.dev);
35732d01
WW
1153 rt6_ex->rt6i->rt6i_node = NULL;
1154 hlist_del_rcu(&rt6_ex->hlist);
1155 rt6_release(rt6_ex->rt6i);
1156 kfree_rcu(rt6_ex, rcu);
1157 WARN_ON_ONCE(!bucket->depth);
1158 bucket->depth--;
81eb8447 1159 net->ipv6.rt6_stats->fib_rt_cache--;
35732d01
WW
1160}
1161
1162/* Remove oldest rt6_ex in bucket and free the memory
1163 * Caller must hold rt6_exception_lock
1164 */
1165static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1166{
1167 struct rt6_exception *rt6_ex, *oldest = NULL;
1168
1169 if (!bucket)
1170 return;
1171
1172 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1173 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1174 oldest = rt6_ex;
1175 }
1176 rt6_remove_exception(bucket, oldest);
1177}
1178
1179static u32 rt6_exception_hash(const struct in6_addr *dst,
1180 const struct in6_addr *src)
1181{
1182 static u32 seed __read_mostly;
1183 u32 val;
1184
1185 net_get_random_once(&seed, sizeof(seed));
1186 val = jhash(dst, sizeof(*dst), seed);
1187
1188#ifdef CONFIG_IPV6_SUBTREES
1189 if (src)
1190 val = jhash(src, sizeof(*src), val);
1191#endif
1192 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1193}
1194
1195/* Helper function to find the cached rt in the hash table
1196 * and update bucket pointer to point to the bucket for this
1197 * (daddr, saddr) pair
1198 * Caller must hold rt6_exception_lock
1199 */
1200static struct rt6_exception *
1201__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1202 const struct in6_addr *daddr,
1203 const struct in6_addr *saddr)
1204{
1205 struct rt6_exception *rt6_ex;
1206 u32 hval;
1207
1208 if (!(*bucket) || !daddr)
1209 return NULL;
1210
1211 hval = rt6_exception_hash(daddr, saddr);
1212 *bucket += hval;
1213
1214 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1215 struct rt6_info *rt6 = rt6_ex->rt6i;
1216 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1217
1218#ifdef CONFIG_IPV6_SUBTREES
1219 if (matched && saddr)
1220 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1221#endif
1222 if (matched)
1223 return rt6_ex;
1224 }
1225 return NULL;
1226}
1227
1228/* Helper function to find the cached rt in the hash table
1229 * and update bucket pointer to point to the bucket for this
1230 * (daddr, saddr) pair
1231 * Caller must hold rcu_read_lock()
1232 */
1233static struct rt6_exception *
1234__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1235 const struct in6_addr *daddr,
1236 const struct in6_addr *saddr)
1237{
1238 struct rt6_exception *rt6_ex;
1239 u32 hval;
1240
1241 WARN_ON_ONCE(!rcu_read_lock_held());
1242
1243 if (!(*bucket) || !daddr)
1244 return NULL;
1245
1246 hval = rt6_exception_hash(daddr, saddr);
1247 *bucket += hval;
1248
1249 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1250 struct rt6_info *rt6 = rt6_ex->rt6i;
1251 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1252
1253#ifdef CONFIG_IPV6_SUBTREES
1254 if (matched && saddr)
1255 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1256#endif
1257 if (matched)
1258 return rt6_ex;
1259 }
1260 return NULL;
1261}
1262
1263static int rt6_insert_exception(struct rt6_info *nrt,
1264 struct rt6_info *ort)
1265{
81eb8447 1266 struct net *net = dev_net(ort->dst.dev);
35732d01
WW
1267 struct rt6_exception_bucket *bucket;
1268 struct in6_addr *src_key = NULL;
1269 struct rt6_exception *rt6_ex;
1270 int err = 0;
1271
1272 /* ort can't be a cache or pcpu route */
1273 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
3a2232e9 1274 ort = ort->from;
35732d01
WW
1275 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1276
1277 spin_lock_bh(&rt6_exception_lock);
1278
1279 if (ort->exception_bucket_flushed) {
1280 err = -EINVAL;
1281 goto out;
1282 }
1283
1284 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1285 lockdep_is_held(&rt6_exception_lock));
1286 if (!bucket) {
1287 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1288 GFP_ATOMIC);
1289 if (!bucket) {
1290 err = -ENOMEM;
1291 goto out;
1292 }
1293 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1294 }
1295
1296#ifdef CONFIG_IPV6_SUBTREES
1297 /* rt6i_src.plen != 0 indicates ort is in subtree
1298 * and exception table is indexed by a hash of
1299 * both rt6i_dst and rt6i_src.
1300 * Otherwise, the exception table is indexed by
1301 * a hash of only rt6i_dst.
1302 */
1303 if (ort->rt6i_src.plen)
1304 src_key = &nrt->rt6i_src.addr;
1305#endif
60006a48
WW
1306
1307 /* Update rt6i_prefsrc as it could be changed
1308 * in rt6_remove_prefsrc()
1309 */
1310 nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
f5bbe7ee
WW
1311 /* rt6_mtu_change() might lower mtu on ort.
1312 * Only insert this exception route if its mtu
1313 * is less than ort's mtu value.
1314 */
1315 if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
1316 err = -EINVAL;
1317 goto out;
1318 }
60006a48 1319
35732d01
WW
1320 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1321 src_key);
1322 if (rt6_ex)
1323 rt6_remove_exception(bucket, rt6_ex);
1324
1325 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1326 if (!rt6_ex) {
1327 err = -ENOMEM;
1328 goto out;
1329 }
1330 rt6_ex->rt6i = nrt;
1331 rt6_ex->stamp = jiffies;
1332 atomic_inc(&nrt->rt6i_ref);
1333 nrt->rt6i_node = ort->rt6i_node;
1334 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1335 bucket->depth++;
81eb8447 1336 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1337
1338 if (bucket->depth > FIB6_MAX_DEPTH)
1339 rt6_exception_remove_oldest(bucket);
1340
1341out:
1342 spin_unlock_bh(&rt6_exception_lock);
1343
1344 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1345 if (!err) {
922c2ac8 1346 spin_lock_bh(&ort->rt6i_table->tb6_lock);
35732d01 1347 fib6_update_sernum(ort);
922c2ac8 1348 spin_unlock_bh(&ort->rt6i_table->tb6_lock);
b886d5f2
PA
1349 fib6_force_start_gc(net);
1350 }
35732d01
WW
1351
1352 return err;
1353}
1354
1355void rt6_flush_exceptions(struct rt6_info *rt)
1356{
1357 struct rt6_exception_bucket *bucket;
1358 struct rt6_exception *rt6_ex;
1359 struct hlist_node *tmp;
1360 int i;
1361
1362 spin_lock_bh(&rt6_exception_lock);
1363 /* Prevent rt6_insert_exception() to recreate the bucket list */
1364 rt->exception_bucket_flushed = 1;
1365
1366 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1367 lockdep_is_held(&rt6_exception_lock));
1368 if (!bucket)
1369 goto out;
1370
1371 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1372 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1373 rt6_remove_exception(bucket, rt6_ex);
1374 WARN_ON_ONCE(bucket->depth);
1375 bucket++;
1376 }
1377
1378out:
1379 spin_unlock_bh(&rt6_exception_lock);
1380}
1381
1382/* Find cached rt in the hash table inside passed in rt
1383 * Caller has to hold rcu_read_lock()
1384 */
1385static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1386 struct in6_addr *daddr,
1387 struct in6_addr *saddr)
1388{
1389 struct rt6_exception_bucket *bucket;
1390 struct in6_addr *src_key = NULL;
1391 struct rt6_exception *rt6_ex;
1392 struct rt6_info *res = NULL;
1393
1394 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1395
1396#ifdef CONFIG_IPV6_SUBTREES
1397 /* rt6i_src.plen != 0 indicates rt is in subtree
1398 * and exception table is indexed by a hash of
1399 * both rt6i_dst and rt6i_src.
1400 * Otherwise, the exception table is indexed by
1401 * a hash of only rt6i_dst.
1402 */
1403 if (rt->rt6i_src.plen)
1404 src_key = saddr;
1405#endif
1406 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1407
1408 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1409 res = rt6_ex->rt6i;
1410
1411 return res;
1412}
1413
1414/* Remove the passed in cached rt from the hash table that contains it */
1415int rt6_remove_exception_rt(struct rt6_info *rt)
1416{
35732d01 1417 struct rt6_exception_bucket *bucket;
3a2232e9 1418 struct rt6_info *from = rt->from;
35732d01
WW
1419 struct in6_addr *src_key = NULL;
1420 struct rt6_exception *rt6_ex;
1421 int err;
1422
1423 if (!from ||
442d713b 1424 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1425 return -EINVAL;
1426
1427 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1428 return -ENOENT;
1429
1430 spin_lock_bh(&rt6_exception_lock);
1431 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1432 lockdep_is_held(&rt6_exception_lock));
1433#ifdef CONFIG_IPV6_SUBTREES
1434 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1435 * and exception table is indexed by a hash of
1436 * both rt6i_dst and rt6i_src.
1437 * Otherwise, the exception table is indexed by
1438 * a hash of only rt6i_dst.
1439 */
1440 if (from->rt6i_src.plen)
1441 src_key = &rt->rt6i_src.addr;
1442#endif
1443 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1444 &rt->rt6i_dst.addr,
1445 src_key);
1446 if (rt6_ex) {
1447 rt6_remove_exception(bucket, rt6_ex);
1448 err = 0;
1449 } else {
1450 err = -ENOENT;
1451 }
1452
1453 spin_unlock_bh(&rt6_exception_lock);
1454 return err;
1455}
1456
1457/* Find rt6_ex which contains the passed in rt cache and
1458 * refresh its stamp
1459 */
1460static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1461{
35732d01 1462 struct rt6_exception_bucket *bucket;
3a2232e9 1463 struct rt6_info *from = rt->from;
35732d01
WW
1464 struct in6_addr *src_key = NULL;
1465 struct rt6_exception *rt6_ex;
1466
1467 if (!from ||
442d713b 1468 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1469 return;
1470
1471 rcu_read_lock();
1472 bucket = rcu_dereference(from->rt6i_exception_bucket);
1473
1474#ifdef CONFIG_IPV6_SUBTREES
1475 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1476 * and exception table is indexed by a hash of
1477 * both rt6i_dst and rt6i_src.
1478 * Otherwise, the exception table is indexed by
1479 * a hash of only rt6i_dst.
1480 */
1481 if (from->rt6i_src.plen)
1482 src_key = &rt->rt6i_src.addr;
1483#endif
1484 rt6_ex = __rt6_find_exception_rcu(&bucket,
1485 &rt->rt6i_dst.addr,
1486 src_key);
1487 if (rt6_ex)
1488 rt6_ex->stamp = jiffies;
1489
1490 rcu_read_unlock();
1491}
1492
60006a48
WW
1493static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1494{
1495 struct rt6_exception_bucket *bucket;
1496 struct rt6_exception *rt6_ex;
1497 int i;
1498
1499 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1500 lockdep_is_held(&rt6_exception_lock));
1501
1502 if (bucket) {
1503 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1504 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1505 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1506 }
1507 bucket++;
1508 }
1509 }
1510}
1511
f5bbe7ee
WW
1512static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
1513{
1514 struct rt6_exception_bucket *bucket;
1515 struct rt6_exception *rt6_ex;
1516 int i;
1517
1518 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1519 lockdep_is_held(&rt6_exception_lock));
1520
1521 if (bucket) {
1522 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1523 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1524 struct rt6_info *entry = rt6_ex->rt6i;
1525 /* For RTF_CACHE with rt6i_pmtu == 0
1526 * (i.e. a redirected route),
1527 * the metrics of its rt->dst.from has already
1528 * been updated.
1529 */
1530 if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
1531 entry->rt6i_pmtu = mtu;
1532 }
1533 bucket++;
1534 }
1535 }
1536}
1537
b16cb459
WW
1538#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1539
1540static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1541 struct in6_addr *gateway)
1542{
1543 struct rt6_exception_bucket *bucket;
1544 struct rt6_exception *rt6_ex;
1545 struct hlist_node *tmp;
1546 int i;
1547
1548 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1549 return;
1550
1551 spin_lock_bh(&rt6_exception_lock);
1552 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1553 lockdep_is_held(&rt6_exception_lock));
1554
1555 if (bucket) {
1556 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1557 hlist_for_each_entry_safe(rt6_ex, tmp,
1558 &bucket->chain, hlist) {
1559 struct rt6_info *entry = rt6_ex->rt6i;
1560
1561 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1562 RTF_CACHE_GATEWAY &&
1563 ipv6_addr_equal(gateway,
1564 &entry->rt6i_gateway)) {
1565 rt6_remove_exception(bucket, rt6_ex);
1566 }
1567 }
1568 bucket++;
1569 }
1570 }
1571
1572 spin_unlock_bh(&rt6_exception_lock);
1573}
1574
c757faa8
WW
1575static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1576 struct rt6_exception *rt6_ex,
1577 struct fib6_gc_args *gc_args,
1578 unsigned long now)
1579{
1580 struct rt6_info *rt = rt6_ex->rt6i;
1581
1859bac0
PA
1582 /* we are pruning and obsoleting aged-out and non gateway exceptions
1583 * even if others have still references to them, so that on next
1584 * dst_check() such references can be dropped.
1585 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1586 * expired, independently from their aging, as per RFC 8201 section 4
1587 */
31afeb42
WW
1588 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1589 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1590 RT6_TRACE("aging clone %p\n", rt);
1591 rt6_remove_exception(bucket, rt6_ex);
1592 return;
1593 }
1594 } else if (time_after(jiffies, rt->dst.expires)) {
1595 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1596 rt6_remove_exception(bucket, rt6_ex);
1597 return;
31afeb42
WW
1598 }
1599
1600 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1601 struct neighbour *neigh;
1602 __u8 neigh_flags = 0;
1603
1604 neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
1605 if (neigh) {
1606 neigh_flags = neigh->flags;
1607 neigh_release(neigh);
1608 }
1609 if (!(neigh_flags & NTF_ROUTER)) {
1610 RT6_TRACE("purging route %p via non-router but gateway\n",
1611 rt);
1612 rt6_remove_exception(bucket, rt6_ex);
1613 return;
1614 }
1615 }
31afeb42 1616
c757faa8
WW
1617 gc_args->more++;
1618}
1619
1620void rt6_age_exceptions(struct rt6_info *rt,
1621 struct fib6_gc_args *gc_args,
1622 unsigned long now)
1623{
1624 struct rt6_exception_bucket *bucket;
1625 struct rt6_exception *rt6_ex;
1626 struct hlist_node *tmp;
1627 int i;
1628
1629 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1630 return;
1631
1632 spin_lock_bh(&rt6_exception_lock);
1633 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1634 lockdep_is_held(&rt6_exception_lock));
1635
1636 if (bucket) {
1637 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1638 hlist_for_each_entry_safe(rt6_ex, tmp,
1639 &bucket->chain, hlist) {
1640 rt6_age_examine_exception(bucket, rt6_ex,
1641 gc_args, now);
1642 }
1643 bucket++;
1644 }
1645 }
1646 spin_unlock_bh(&rt6_exception_lock);
1647}
1648
9ff74384
DA
1649struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1650 int oif, struct flowi6 *fl6, int flags)
1da177e4 1651{
367efcb9 1652 struct fib6_node *fn, *saved_fn;
2b760fcf 1653 struct rt6_info *rt, *rt_cache;
c71099ac 1654 int strict = 0;
1da177e4 1655
77d16f45 1656 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1657 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1658 if (net->ipv6.devconf_all->forwarding == 0)
1659 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1660
66f5d6ce 1661 rcu_read_lock();
1da177e4 1662
4c9483b2 1663 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1664 saved_fn = fn;
1da177e4 1665
ca254490
DA
1666 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1667 oif = 0;
1668
a3c00e46 1669redo_rt6_select:
8d1040e8 1670 rt = rt6_select(net, fn, oif, strict);
52bd4c0c 1671 if (rt->rt6i_nsiblings)
367efcb9 1672 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1673 if (rt == net->ipv6.ip6_null_entry) {
1674 fn = fib6_backtrack(fn, &fl6->saddr);
1675 if (fn)
1676 goto redo_rt6_select;
367efcb9
MKL
1677 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1678 /* also consider unreachable route */
1679 strict &= ~RT6_LOOKUP_F_REACHABLE;
1680 fn = saved_fn;
1681 goto redo_rt6_select;
367efcb9 1682 }
a3c00e46
MKL
1683 }
1684
2b760fcf
WW
1685 /*Search through exception table */
1686 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
1687 if (rt_cache)
1688 rt = rt_cache;
fb9de91e 1689
d3843fe5 1690 if (rt == net->ipv6.ip6_null_entry) {
66f5d6ce 1691 rcu_read_unlock();
d3843fe5 1692 dst_hold(&rt->dst);
b65f164d 1693 trace_fib6_table_lookup(net, rt, table, fl6);
d3843fe5
WW
1694 return rt;
1695 } else if (rt->rt6i_flags & RTF_CACHE) {
1696 if (ip6_hold_safe(net, &rt, true)) {
1697 dst_use_noref(&rt->dst, jiffies);
1698 rt6_dst_from_metrics_check(rt);
1699 }
66f5d6ce 1700 rcu_read_unlock();
b65f164d 1701 trace_fib6_table_lookup(net, rt, table, fl6);
d52d3997 1702 return rt;
3da59bd9
MKL
1703 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1704 !(rt->rt6i_flags & RTF_GATEWAY))) {
1705 /* Create a RTF_CACHE clone which will not be
1706 * owned by the fib6 tree. It is for the special case where
1707 * the daddr in the skb during the neighbor look-up is different
1708 * from the fl6->daddr used to look-up route here.
1709 */
1710
1711 struct rt6_info *uncached_rt;
1712
d3843fe5
WW
1713 if (ip6_hold_safe(net, &rt, true)) {
1714 dst_use_noref(&rt->dst, jiffies);
1715 } else {
66f5d6ce 1716 rcu_read_unlock();
d3843fe5
WW
1717 uncached_rt = rt;
1718 goto uncached_rt_out;
1719 }
66f5d6ce 1720 rcu_read_unlock();
d52d3997 1721
3da59bd9
MKL
1722 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1723 dst_release(&rt->dst);
c71099ac 1724
1cfb71ee
WW
1725 if (uncached_rt) {
1726 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1727 * No need for another dst_hold()
1728 */
8d0b94af 1729 rt6_uncached_list_add(uncached_rt);
81eb8447 1730 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1731 } else {
3da59bd9 1732 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1733 dst_hold(&uncached_rt->dst);
1734 }
b811580d 1735
d3843fe5 1736uncached_rt_out:
b65f164d 1737 trace_fib6_table_lookup(net, uncached_rt, table, fl6);
3da59bd9 1738 return uncached_rt;
3da59bd9 1739
d52d3997
MKL
1740 } else {
1741 /* Get a percpu copy */
1742
1743 struct rt6_info *pcpu_rt;
1744
d3843fe5 1745 dst_use_noref(&rt->dst, jiffies);
951f788a 1746 local_bh_disable();
d52d3997 1747 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1748
951f788a 1749 if (!pcpu_rt) {
a94b9367
WW
1750 /* atomic_inc_not_zero() is needed when using rcu */
1751 if (atomic_inc_not_zero(&rt->rt6i_ref)) {
951f788a 1752 /* No dst_hold() on rt is needed because grabbing
a94b9367
WW
1753 * rt->rt6i_ref makes sure rt can't be released.
1754 */
a94b9367
WW
1755 pcpu_rt = rt6_make_pcpu_route(rt);
1756 rt6_release(rt);
1757 } else {
1758 /* rt is already removed from tree */
a94b9367
WW
1759 pcpu_rt = net->ipv6.ip6_null_entry;
1760 dst_hold(&pcpu_rt->dst);
1761 }
9c7370a1 1762 }
951f788a
ED
1763 local_bh_enable();
1764 rcu_read_unlock();
b65f164d 1765 trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
d52d3997
MKL
1766 return pcpu_rt;
1767 }
1da177e4 1768}
9ff74384 1769EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1770
8ed67789 1771static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1772 struct flowi6 *fl6, int flags)
4acad72d 1773{
4c9483b2 1774 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1775}
1776
d409b847
MB
1777struct dst_entry *ip6_route_input_lookup(struct net *net,
1778 struct net_device *dev,
1779 struct flowi6 *fl6, int flags)
72331bc0
SL
1780{
1781 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1782 flags |= RT6_LOOKUP_F_IFACE;
1783
1784 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1785}
d409b847 1786EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1787
23aebdac
JS
1788static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1789 struct flow_keys *keys)
1790{
1791 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1792 const struct ipv6hdr *key_iph = outer_iph;
1793 const struct ipv6hdr *inner_iph;
1794 const struct icmp6hdr *icmph;
1795 struct ipv6hdr _inner_iph;
1796
1797 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1798 goto out;
1799
1800 icmph = icmp6_hdr(skb);
1801 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1802 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1803 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1804 icmph->icmp6_type != ICMPV6_PARAMPROB)
1805 goto out;
1806
1807 inner_iph = skb_header_pointer(skb,
1808 skb_transport_offset(skb) + sizeof(*icmph),
1809 sizeof(_inner_iph), &_inner_iph);
1810 if (!inner_iph)
1811 goto out;
1812
1813 key_iph = inner_iph;
1814out:
1815 memset(keys, 0, sizeof(*keys));
1816 keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1817 keys->addrs.v6addrs.src = key_iph->saddr;
1818 keys->addrs.v6addrs.dst = key_iph->daddr;
1819 keys->tags.flow_label = ip6_flowinfo(key_iph);
1820 keys->basic.ip_proto = key_iph->nexthdr;
1821}
1822
1823/* if skb is set it will be used and fl6 can be NULL */
1824u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
1825{
1826 struct flow_keys hash_keys;
1827
1828 if (skb) {
1829 ip6_multipath_l3_keys(skb, &hash_keys);
7696c06a 1830 return flow_hash_from_keys(&hash_keys) >> 1;
23aebdac
JS
1831 }
1832
7696c06a 1833 return get_hash_from_flowi6(fl6) >> 1;
23aebdac
JS
1834}
1835
c71099ac
TG
1836void ip6_route_input(struct sk_buff *skb)
1837{
b71d1d42 1838 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1839 struct net *net = dev_net(skb->dev);
adaa70bb 1840 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1841 struct ip_tunnel_info *tun_info;
4c9483b2 1842 struct flowi6 fl6 = {
e0d56fdd 1843 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
1844 .daddr = iph->daddr,
1845 .saddr = iph->saddr,
6502ca52 1846 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1847 .flowi6_mark = skb->mark,
1848 .flowi6_proto = iph->nexthdr,
c71099ac 1849 };
adaa70bb 1850
904af04d 1851 tun_info = skb_tunnel_info(skb);
46fa062a 1852 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1853 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
23aebdac
JS
1854 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
1855 fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
06e9d040 1856 skb_dst_drop(skb);
72331bc0 1857 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1858}
1859
8ed67789 1860static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1861 struct flowi6 *fl6, int flags)
1da177e4 1862{
4c9483b2 1863 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1864}
1865
6f21c96a
PA
1866struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1867 struct flowi6 *fl6, int flags)
c71099ac 1868{
d46a9d67 1869 bool any_src;
c71099ac 1870
4c1feac5
DA
1871 if (rt6_need_strict(&fl6->daddr)) {
1872 struct dst_entry *dst;
1873
1874 dst = l3mdev_link_scope_lookup(net, fl6);
1875 if (dst)
1876 return dst;
1877 }
ca254490 1878
1fb9489b 1879 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1880
d46a9d67 1881 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1882 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1883 (fl6->flowi6_oif && any_src))
77d16f45 1884 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1885
d46a9d67 1886 if (!any_src)
adaa70bb 1887 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1888 else if (sk)
1889 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1890
4c9483b2 1891 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1892}
6f21c96a 1893EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 1894
2774c131 1895struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1896{
5c1e6aa3 1897 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 1898 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
1899 struct dst_entry *new = NULL;
1900
1dbe3252 1901 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 1902 DST_OBSOLETE_DEAD, 0);
14e50e57 1903 if (rt) {
0a1f5962 1904 rt6_info_init(rt);
81eb8447 1905 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 1906
0a1f5962 1907 new = &rt->dst;
14e50e57 1908 new->__use = 1;
352e512c 1909 new->input = dst_discard;
ede2059d 1910 new->output = dst_discard_out;
14e50e57 1911
0a1f5962 1912 dst_copy_metrics(new, &ort->dst);
14e50e57 1913
1dbe3252 1914 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 1915 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 1916 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
1917 rt->rt6i_metric = 0;
1918
1919 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1920#ifdef CONFIG_IPV6_SUBTREES
1921 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1922#endif
14e50e57
DM
1923 }
1924
69ead7af
DM
1925 dst_release(dst_orig);
1926 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1927}
14e50e57 1928
1da177e4
LT
1929/*
1930 * Destination cache support functions
1931 */
1932
4b32b5ad
MKL
1933static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1934{
3a2232e9
DM
1935 if (rt->from &&
1936 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst))
1937 dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true);
4b32b5ad
MKL
1938}
1939
3da59bd9
MKL
1940static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1941{
36143645 1942 u32 rt_cookie = 0;
c5cff856
WW
1943
1944 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
3da59bd9
MKL
1945 return NULL;
1946
1947 if (rt6_check_expired(rt))
1948 return NULL;
1949
1950 return &rt->dst;
1951}
1952
1953static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1954{
5973fb1e
MKL
1955 if (!__rt6_check_expired(rt) &&
1956 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3a2232e9 1957 rt6_check(rt->from, cookie))
3da59bd9
MKL
1958 return &rt->dst;
1959 else
1960 return NULL;
1961}
1962
1da177e4
LT
1963static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1964{
1965 struct rt6_info *rt;
1966
1967 rt = (struct rt6_info *) dst;
1968
6f3118b5
ND
1969 /* All IPV6 dsts are created with ->obsolete set to the value
1970 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1971 * into this function always.
1972 */
e3bc10bd 1973
4b32b5ad
MKL
1974 rt6_dst_from_metrics_check(rt);
1975
02bcf4e0 1976 if (rt->rt6i_flags & RTF_PCPU ||
3a2232e9 1977 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
3da59bd9
MKL
1978 return rt6_dst_from_check(rt, cookie);
1979 else
1980 return rt6_check(rt, cookie);
1da177e4
LT
1981}
1982
1983static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1984{
1985 struct rt6_info *rt = (struct rt6_info *) dst;
1986
1987 if (rt) {
54c1a859
YH
1988 if (rt->rt6i_flags & RTF_CACHE) {
1989 if (rt6_check_expired(rt)) {
1990 ip6_del_rt(rt);
1991 dst = NULL;
1992 }
1993 } else {
1da177e4 1994 dst_release(dst);
54c1a859
YH
1995 dst = NULL;
1996 }
1da177e4 1997 }
54c1a859 1998 return dst;
1da177e4
LT
1999}
2000
2001static void ip6_link_failure(struct sk_buff *skb)
2002{
2003 struct rt6_info *rt;
2004
3ffe533c 2005 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2006
adf30907 2007 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2008 if (rt) {
1eb4f758 2009 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0
WW
2010 if (dst_hold_safe(&rt->dst))
2011 ip6_del_rt(rt);
c5cff856
WW
2012 } else {
2013 struct fib6_node *fn;
2014
2015 rcu_read_lock();
2016 fn = rcu_dereference(rt->rt6i_node);
2017 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2018 fn->fn_sernum = -1;
2019 rcu_read_unlock();
1eb4f758 2020 }
1da177e4
LT
2021 }
2022}
2023
45e4fd26
MKL
2024static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2025{
2026 struct net *net = dev_net(rt->dst.dev);
2027
2028 rt->rt6i_flags |= RTF_MODIFIED;
2029 rt->rt6i_pmtu = mtu;
2030 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2031}
2032
0d3f6d29
MKL
2033static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2034{
2035 return !(rt->rt6i_flags & RTF_CACHE) &&
4e587ea7
WW
2036 (rt->rt6i_flags & RTF_PCPU ||
2037 rcu_access_pointer(rt->rt6i_node));
0d3f6d29
MKL
2038}
2039
45e4fd26
MKL
2040static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2041 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2042{
0dec879f 2043 const struct in6_addr *daddr, *saddr;
67ba4152 2044 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2045
45e4fd26
MKL
2046 if (rt6->rt6i_flags & RTF_LOCAL)
2047 return;
81aded24 2048
19bda36c
XL
2049 if (dst_metric_locked(dst, RTAX_MTU))
2050 return;
2051
0dec879f
JA
2052 if (iph) {
2053 daddr = &iph->daddr;
2054 saddr = &iph->saddr;
2055 } else if (sk) {
2056 daddr = &sk->sk_v6_daddr;
2057 saddr = &inet6_sk(sk)->saddr;
2058 } else {
2059 daddr = NULL;
2060 saddr = NULL;
2061 }
2062 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2063 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2064 if (mtu >= dst_mtu(dst))
2065 return;
9d289715 2066
0d3f6d29 2067 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2068 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2069 /* update rt6_ex->stamp for cache */
2070 if (rt6->rt6i_flags & RTF_CACHE)
2071 rt6_update_exception_stamp_rt(rt6);
0dec879f 2072 } else if (daddr) {
45e4fd26
MKL
2073 struct rt6_info *nrt6;
2074
45e4fd26
MKL
2075 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
2076 if (nrt6) {
2077 rt6_do_update_pmtu(nrt6, mtu);
2b760fcf
WW
2078 if (rt6_insert_exception(nrt6, rt6))
2079 dst_release_immediate(&nrt6->dst);
45e4fd26 2080 }
1da177e4
LT
2081 }
2082}
2083
45e4fd26
MKL
2084static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2085 struct sk_buff *skb, u32 mtu)
2086{
2087 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2088}
2089
42ae66c8 2090void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2091 int oif, u32 mark, kuid_t uid)
81aded24
DM
2092{
2093 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2094 struct dst_entry *dst;
2095 struct flowi6 fl6;
2096
2097 memset(&fl6, 0, sizeof(fl6));
2098 fl6.flowi6_oif = oif;
1b3c61dc 2099 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
2100 fl6.daddr = iph->daddr;
2101 fl6.saddr = iph->saddr;
6502ca52 2102 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2103 fl6.flowi6_uid = uid;
81aded24
DM
2104
2105 dst = ip6_route_output(net, NULL, &fl6);
2106 if (!dst->error)
45e4fd26 2107 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2108 dst_release(dst);
2109}
2110EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2111
2112void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2113{
33c162a9
MKL
2114 struct dst_entry *dst;
2115
81aded24 2116 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 2117 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2118
2119 dst = __sk_dst_get(sk);
2120 if (!dst || !dst->obsolete ||
2121 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2122 return;
2123
2124 bh_lock_sock(sk);
2125 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2126 ip6_datagram_dst_update(sk, false);
2127 bh_unlock_sock(sk);
81aded24
DM
2128}
2129EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2130
b55b76b2
DJ
2131/* Handle redirects */
2132struct ip6rd_flowi {
2133 struct flowi6 fl6;
2134 struct in6_addr gateway;
2135};
2136
2137static struct rt6_info *__ip6_route_redirect(struct net *net,
2138 struct fib6_table *table,
2139 struct flowi6 *fl6,
2140 int flags)
2141{
2142 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2b760fcf 2143 struct rt6_info *rt, *rt_cache;
b55b76b2
DJ
2144 struct fib6_node *fn;
2145
2146 /* Get the "current" route for this destination and
67c408cf 2147 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2148 *
2149 * RFC 4861 specifies that redirects should only be
2150 * accepted if they come from the nexthop to the target.
2151 * Due to the way the routes are chosen, this notion
2152 * is a bit fuzzy and one might need to check all possible
2153 * routes.
2154 */
2155
66f5d6ce 2156 rcu_read_lock();
b55b76b2
DJ
2157 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2158restart:
66f5d6ce 2159 for_each_fib6_node_rt_rcu(fn) {
8067bb8c
IS
2160 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
2161 continue;
b55b76b2
DJ
2162 if (rt6_check_expired(rt))
2163 continue;
2164 if (rt->dst.error)
2165 break;
2166 if (!(rt->rt6i_flags & RTF_GATEWAY))
2167 continue;
2168 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
2169 continue;
2b760fcf
WW
2170 /* rt_cache's gateway might be different from its 'parent'
2171 * in the case of an ip redirect.
2172 * So we keep searching in the exception table if the gateway
2173 * is different.
2174 */
2175 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
2176 rt_cache = rt6_find_cached_rt(rt,
2177 &fl6->daddr,
2178 &fl6->saddr);
2179 if (rt_cache &&
2180 ipv6_addr_equal(&rdfl->gateway,
2181 &rt_cache->rt6i_gateway)) {
2182 rt = rt_cache;
2183 break;
2184 }
b55b76b2 2185 continue;
2b760fcf 2186 }
b55b76b2
DJ
2187 break;
2188 }
2189
2190 if (!rt)
2191 rt = net->ipv6.ip6_null_entry;
2192 else if (rt->dst.error) {
2193 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2194 goto out;
2195 }
2196
2197 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
2198 fn = fib6_backtrack(fn, &fl6->saddr);
2199 if (fn)
2200 goto restart;
b55b76b2 2201 }
a3c00e46 2202
b0a1ba59 2203out:
d3843fe5 2204 ip6_hold_safe(net, &rt, true);
b55b76b2 2205
66f5d6ce 2206 rcu_read_unlock();
b55b76b2 2207
b65f164d 2208 trace_fib6_table_lookup(net, rt, table, fl6);
b55b76b2
DJ
2209 return rt;
2210};
2211
2212static struct dst_entry *ip6_route_redirect(struct net *net,
2213 const struct flowi6 *fl6,
2214 const struct in6_addr *gateway)
2215{
2216 int flags = RT6_LOOKUP_F_HAS_SADDR;
2217 struct ip6rd_flowi rdfl;
2218
2219 rdfl.fl6 = *fl6;
2220 rdfl.gateway = *gateway;
2221
2222 return fib6_rule_lookup(net, &rdfl.fl6,
2223 flags, __ip6_route_redirect);
2224}
2225
e2d118a1
LC
2226void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2227 kuid_t uid)
3a5ad2ee
DM
2228{
2229 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2230 struct dst_entry *dst;
2231 struct flowi6 fl6;
2232
2233 memset(&fl6, 0, sizeof(fl6));
e374c618 2234 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2235 fl6.flowi6_oif = oif;
2236 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2237 fl6.daddr = iph->daddr;
2238 fl6.saddr = iph->saddr;
6502ca52 2239 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2240 fl6.flowi6_uid = uid;
3a5ad2ee 2241
b55b76b2
DJ
2242 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
2243 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2244 dst_release(dst);
2245}
2246EXPORT_SYMBOL_GPL(ip6_redirect);
2247
c92a59ec
DJ
2248void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2249 u32 mark)
2250{
2251 const struct ipv6hdr *iph = ipv6_hdr(skb);
2252 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2253 struct dst_entry *dst;
2254 struct flowi6 fl6;
2255
2256 memset(&fl6, 0, sizeof(fl6));
e374c618 2257 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2258 fl6.flowi6_oif = oif;
2259 fl6.flowi6_mark = mark;
c92a59ec
DJ
2260 fl6.daddr = msg->dest;
2261 fl6.saddr = iph->daddr;
e2d118a1 2262 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2263
b55b76b2
DJ
2264 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
2265 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2266 dst_release(dst);
2267}
2268
3a5ad2ee
DM
2269void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2270{
e2d118a1
LC
2271 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2272 sk->sk_uid);
3a5ad2ee
DM
2273}
2274EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2275
0dbaee3b 2276static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2277{
0dbaee3b
DM
2278 struct net_device *dev = dst->dev;
2279 unsigned int mtu = dst_mtu(dst);
2280 struct net *net = dev_net(dev);
2281
1da177e4
LT
2282 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2283
5578689a
DL
2284 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2285 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2286
2287 /*
1ab1457c
YH
2288 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2289 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2290 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2291 * rely only on pmtu discovery"
2292 */
2293 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2294 mtu = IPV6_MAXPLEN;
2295 return mtu;
2296}
2297
ebb762f2 2298static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2299{
4b32b5ad
MKL
2300 const struct rt6_info *rt = (const struct rt6_info *)dst;
2301 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 2302 struct inet6_dev *idev;
618f9bc7 2303
4b32b5ad
MKL
2304 if (mtu)
2305 goto out;
2306
2307 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2308 if (mtu)
30f78d8e 2309 goto out;
618f9bc7
SK
2310
2311 mtu = IPV6_MIN_MTU;
d33e4553
DM
2312
2313 rcu_read_lock();
2314 idev = __in6_dev_get(dst->dev);
2315 if (idev)
2316 mtu = idev->cnf.mtu6;
2317 rcu_read_unlock();
2318
30f78d8e 2319out:
14972cbd
RP
2320 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2321
2322 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2323}
2324
3b00944c 2325struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2326 struct flowi6 *fl6)
1da177e4 2327{
87a11578 2328 struct dst_entry *dst;
1da177e4
LT
2329 struct rt6_info *rt;
2330 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2331 struct net *net = dev_net(dev);
1da177e4 2332
38308473 2333 if (unlikely(!idev))
122bdf67 2334 return ERR_PTR(-ENODEV);
1da177e4 2335
ad706862 2336 rt = ip6_dst_alloc(net, dev, 0);
38308473 2337 if (unlikely(!rt)) {
1da177e4 2338 in6_dev_put(idev);
87a11578 2339 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2340 goto out;
2341 }
2342
8e2ec639 2343 rt->dst.flags |= DST_HOST;
588753f1 2344 rt->dst.input = ip6_input;
8e2ec639 2345 rt->dst.output = ip6_output;
550bab42 2346 rt->rt6i_gateway = fl6->daddr;
87a11578 2347 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2348 rt->rt6i_dst.plen = 128;
2349 rt->rt6i_idev = idev;
14edd87d 2350 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2351
4c981e28 2352 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2353 * do proper release of the net_device
2354 */
2355 rt6_uncached_list_add(rt);
81eb8447 2356 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2357
87a11578
DM
2358 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2359
1da177e4 2360out:
87a11578 2361 return dst;
1da177e4
LT
2362}
2363
569d3645 2364static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2365{
86393e52 2366 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2367 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2368 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2369 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2370 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2371 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2372 int entries;
7019b78e 2373
fc66f95c 2374 entries = dst_entries_get_fast(ops);
49a18d86 2375 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2376 entries <= rt_max_size)
1da177e4
LT
2377 goto out;
2378
6891a346 2379 net->ipv6.ip6_rt_gc_expire++;
14956643 2380 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2381 entries = dst_entries_get_slow(ops);
2382 if (entries < ops->gc_thresh)
7019b78e 2383 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2384out:
7019b78e 2385 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2386 return entries > rt_max_size;
1da177e4
LT
2387}
2388
e715b6d3
FW
2389static int ip6_convert_metrics(struct mx6_config *mxc,
2390 const struct fib6_config *cfg)
2391{
6670e152 2392 struct net *net = cfg->fc_nlinfo.nl_net;
c3a8d947 2393 bool ecn_ca = false;
e715b6d3
FW
2394 struct nlattr *nla;
2395 int remaining;
2396 u32 *mp;
2397
63159f29 2398 if (!cfg->fc_mx)
e715b6d3
FW
2399 return 0;
2400
2401 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
2402 if (unlikely(!mp))
2403 return -ENOMEM;
2404
2405 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
2406 int type = nla_type(nla);
1bb14807 2407 u32 val;
e715b6d3 2408
1bb14807
DB
2409 if (!type)
2410 continue;
2411 if (unlikely(type > RTAX_MAX))
2412 goto err;
ea697639 2413
1bb14807
DB
2414 if (type == RTAX_CC_ALGO) {
2415 char tmp[TCP_CA_NAME_MAX];
e715b6d3 2416
1bb14807 2417 nla_strlcpy(tmp, nla, sizeof(tmp));
6670e152 2418 val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
1bb14807
DB
2419 if (val == TCP_CA_UNSPEC)
2420 goto err;
2421 } else {
2422 val = nla_get_u32(nla);
e715b6d3 2423 }
626abd59
PA
2424 if (type == RTAX_HOPLIMIT && val > 255)
2425 val = 255;
b8d3e416
DB
2426 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
2427 goto err;
1bb14807
DB
2428
2429 mp[type - 1] = val;
2430 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
2431 }
2432
c3a8d947
DB
2433 if (ecn_ca) {
2434 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
2435 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
2436 }
e715b6d3 2437
c3a8d947 2438 mxc->mx = mp;
e715b6d3
FW
2439 return 0;
2440 err:
2441 kfree(mp);
2442 return -EINVAL;
2443}
1da177e4 2444
8c14586f
DA
2445static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2446 struct fib6_config *cfg,
f4797b33
DA
2447 const struct in6_addr *gw_addr,
2448 u32 tbid, int flags)
8c14586f
DA
2449{
2450 struct flowi6 fl6 = {
2451 .flowi6_oif = cfg->fc_ifindex,
2452 .daddr = *gw_addr,
2453 .saddr = cfg->fc_prefsrc,
2454 };
2455 struct fib6_table *table;
2456 struct rt6_info *rt;
8c14586f 2457
f4797b33 2458 table = fib6_get_table(net, tbid);
8c14586f
DA
2459 if (!table)
2460 return NULL;
2461
2462 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2463 flags |= RT6_LOOKUP_F_HAS_SADDR;
2464
f4797b33 2465 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
8c14586f
DA
2466 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
2467
2468 /* if table lookup failed, fall back to full lookup */
2469 if (rt == net->ipv6.ip6_null_entry) {
2470 ip6_rt_put(rt);
2471 rt = NULL;
2472 }
2473
2474 return rt;
2475}
2476
fc1e64e1
DA
2477static int ip6_route_check_nh_onlink(struct net *net,
2478 struct fib6_config *cfg,
2479 struct net_device *dev,
2480 struct netlink_ext_ack *extack)
2481{
2482 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
2483 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2484 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2485 struct rt6_info *grt;
2486 int err;
2487
2488 err = 0;
2489 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2490 if (grt) {
2491 if (grt->rt6i_flags & flags || dev != grt->dst.dev) {
2492 NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
2493 err = -EINVAL;
2494 }
2495
2496 ip6_rt_put(grt);
2497 }
2498
2499 return err;
2500}
2501
1edce99f
DA
2502static int ip6_route_check_nh(struct net *net,
2503 struct fib6_config *cfg,
2504 struct net_device **_dev,
2505 struct inet6_dev **idev)
2506{
2507 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2508 struct net_device *dev = _dev ? *_dev : NULL;
2509 struct rt6_info *grt = NULL;
2510 int err = -EHOSTUNREACH;
2511
2512 if (cfg->fc_table) {
f4797b33
DA
2513 int flags = RT6_LOOKUP_F_IFACE;
2514
2515 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2516 cfg->fc_table, flags);
1edce99f
DA
2517 if (grt) {
2518 if (grt->rt6i_flags & RTF_GATEWAY ||
2519 (dev && dev != grt->dst.dev)) {
2520 ip6_rt_put(grt);
2521 grt = NULL;
2522 }
2523 }
2524 }
2525
2526 if (!grt)
2527 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
2528
2529 if (!grt)
2530 goto out;
2531
2532 if (dev) {
2533 if (dev != grt->dst.dev) {
2534 ip6_rt_put(grt);
2535 goto out;
2536 }
2537 } else {
2538 *_dev = dev = grt->dst.dev;
2539 *idev = grt->rt6i_idev;
2540 dev_hold(dev);
2541 in6_dev_hold(grt->rt6i_idev);
2542 }
2543
2544 if (!(grt->rt6i_flags & RTF_GATEWAY))
2545 err = 0;
2546
2547 ip6_rt_put(grt);
2548
2549out:
2550 return err;
2551}
2552
333c4301
DA
2553static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2554 struct netlink_ext_ack *extack)
1da177e4 2555{
5578689a 2556 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
2557 struct rt6_info *rt = NULL;
2558 struct net_device *dev = NULL;
2559 struct inet6_dev *idev = NULL;
c71099ac 2560 struct fib6_table *table;
1da177e4 2561 int addr_type;
8c5b83f0 2562 int err = -EINVAL;
1da177e4 2563
557c44be 2564 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2565 if (cfg->fc_flags & RTF_PCPU) {
2566 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2567 goto out;
d5d531cb 2568 }
557c44be 2569
2ea2352e
WW
2570 /* RTF_CACHE is an internal flag; can not be set by userspace */
2571 if (cfg->fc_flags & RTF_CACHE) {
2572 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2573 goto out;
2574 }
2575
d5d531cb
DA
2576 if (cfg->fc_dst_len > 128) {
2577 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2578 goto out;
2579 }
2580 if (cfg->fc_src_len > 128) {
2581 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2582 goto out;
d5d531cb 2583 }
1da177e4 2584#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2585 if (cfg->fc_src_len) {
2586 NL_SET_ERR_MSG(extack,
2587 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2588 goto out;
d5d531cb 2589 }
1da177e4 2590#endif
86872cb5 2591 if (cfg->fc_ifindex) {
1da177e4 2592 err = -ENODEV;
5578689a 2593 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2594 if (!dev)
2595 goto out;
2596 idev = in6_dev_get(dev);
2597 if (!idev)
2598 goto out;
2599 }
2600
86872cb5
TG
2601 if (cfg->fc_metric == 0)
2602 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2603
fc1e64e1
DA
2604 if (cfg->fc_flags & RTNH_F_ONLINK) {
2605 if (!dev) {
2606 NL_SET_ERR_MSG(extack,
2607 "Nexthop device required for onlink");
2608 err = -ENODEV;
2609 goto out;
2610 }
2611
2612 if (!(dev->flags & IFF_UP)) {
2613 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2614 err = -ENETDOWN;
2615 goto out;
2616 }
2617 }
2618
d71314b4 2619 err = -ENOBUFS;
38308473
DM
2620 if (cfg->fc_nlinfo.nlh &&
2621 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2622 table = fib6_get_table(net, cfg->fc_table);
38308473 2623 if (!table) {
f3213831 2624 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2625 table = fib6_new_table(net, cfg->fc_table);
2626 }
2627 } else {
2628 table = fib6_new_table(net, cfg->fc_table);
2629 }
38308473
DM
2630
2631 if (!table)
c71099ac 2632 goto out;
c71099ac 2633
ad706862
MKL
2634 rt = ip6_dst_alloc(net, NULL,
2635 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 2636
38308473 2637 if (!rt) {
1da177e4
LT
2638 err = -ENOMEM;
2639 goto out;
2640 }
2641
1716a961
G
2642 if (cfg->fc_flags & RTF_EXPIRES)
2643 rt6_set_expires(rt, jiffies +
2644 clock_t_to_jiffies(cfg->fc_expires));
2645 else
2646 rt6_clean_expires(rt);
1da177e4 2647
86872cb5
TG
2648 if (cfg->fc_protocol == RTPROT_UNSPEC)
2649 cfg->fc_protocol = RTPROT_BOOT;
2650 rt->rt6i_protocol = cfg->fc_protocol;
2651
2652 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
2653
2654 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 2655 rt->dst.input = ip6_mc_input;
ab79ad14
2656 else if (cfg->fc_flags & RTF_LOCAL)
2657 rt->dst.input = ip6_input;
1da177e4 2658 else
d8d1f30b 2659 rt->dst.input = ip6_forward;
1da177e4 2660
d8d1f30b 2661 rt->dst.output = ip6_output;
1da177e4 2662
19e42e45
RP
2663 if (cfg->fc_encap) {
2664 struct lwtunnel_state *lwtstate;
2665
30357d7d 2666 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 2667 cfg->fc_encap, AF_INET6, cfg,
9ae28727 2668 &lwtstate, extack);
19e42e45
RP
2669 if (err)
2670 goto out;
61adedf3
JB
2671 rt->dst.lwtstate = lwtstate_get(lwtstate);
2672 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
2673 rt->dst.lwtstate->orig_output = rt->dst.output;
2674 rt->dst.output = lwtunnel_output;
25368623 2675 }
61adedf3
JB
2676 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
2677 rt->dst.lwtstate->orig_input = rt->dst.input;
2678 rt->dst.input = lwtunnel_input;
25368623 2679 }
19e42e45
RP
2680 }
2681
86872cb5
TG
2682 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2683 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 2684 if (rt->rt6i_dst.plen == 128)
e5fd387a 2685 rt->dst.flags |= DST_HOST;
e5fd387a 2686
1da177e4 2687#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
2688 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2689 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
2690#endif
2691
86872cb5 2692 rt->rt6i_metric = cfg->fc_metric;
398958ae 2693 rt->rt6i_nh_weight = 1;
1da177e4
LT
2694
2695 /* We cannot add true routes via loopback here,
2696 they would result in kernel looping; promote them to reject routes
2697 */
86872cb5 2698 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
2699 (dev && (dev->flags & IFF_LOOPBACK) &&
2700 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2701 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 2702 /* hold loopback dev/idev if we haven't done so. */
5578689a 2703 if (dev != net->loopback_dev) {
1da177e4
LT
2704 if (dev) {
2705 dev_put(dev);
2706 in6_dev_put(idev);
2707 }
5578689a 2708 dev = net->loopback_dev;
1da177e4
LT
2709 dev_hold(dev);
2710 idev = in6_dev_get(dev);
2711 if (!idev) {
2712 err = -ENODEV;
2713 goto out;
2714 }
2715 }
1da177e4 2716 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
2717 switch (cfg->fc_type) {
2718 case RTN_BLACKHOLE:
2719 rt->dst.error = -EINVAL;
ede2059d 2720 rt->dst.output = dst_discard_out;
7150aede 2721 rt->dst.input = dst_discard;
ef2c7d7b
ND
2722 break;
2723 case RTN_PROHIBIT:
2724 rt->dst.error = -EACCES;
7150aede
K
2725 rt->dst.output = ip6_pkt_prohibit_out;
2726 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 2727 break;
b4949ab2 2728 case RTN_THROW:
0315e382 2729 case RTN_UNREACHABLE:
ef2c7d7b 2730 default:
7150aede 2731 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
2732 : (cfg->fc_type == RTN_UNREACHABLE)
2733 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
2734 rt->dst.output = ip6_pkt_discard_out;
2735 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
2736 break;
2737 }
1da177e4
LT
2738 goto install_route;
2739 }
2740
86872cb5 2741 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 2742 const struct in6_addr *gw_addr;
1da177e4
LT
2743 int gwa_type;
2744
86872cb5 2745 gw_addr = &cfg->fc_gateway;
330567b7 2746 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
2747
2748 /* if gw_addr is local we will fail to detect this in case
2749 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2750 * will return already-added prefix route via interface that
2751 * prefix route was assigned to, which might be non-loopback.
2752 */
2753 err = -EINVAL;
330567b7
FW
2754 if (ipv6_chk_addr_and_flags(net, gw_addr,
2755 gwa_type & IPV6_ADDR_LINKLOCAL ?
d5d531cb
DA
2756 dev : NULL, 0, 0)) {
2757 NL_SET_ERR_MSG(extack, "Invalid gateway address");
48ed7b26 2758 goto out;
d5d531cb 2759 }
4e3fd7a0 2760 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
2761
2762 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1da177e4
LT
2763 /* IPv6 strictly inhibits using not link-local
2764 addresses as nexthop address.
2765 Otherwise, router will not able to send redirects.
2766 It is very good, but in some (rare!) circumstances
2767 (SIT, PtP, NBMA NOARP links) it is handy to allow
2768 some exceptions. --ANK
96d5822c
EN
2769 We allow IPv4-mapped nexthops to support RFC4798-type
2770 addressing
1da177e4 2771 */
96d5822c 2772 if (!(gwa_type & (IPV6_ADDR_UNICAST |
d5d531cb
DA
2773 IPV6_ADDR_MAPPED))) {
2774 NL_SET_ERR_MSG(extack,
2775 "Invalid gateway address");
1da177e4 2776 goto out;
d5d531cb 2777 }
1da177e4 2778
fc1e64e1
DA
2779 if (cfg->fc_flags & RTNH_F_ONLINK) {
2780 err = ip6_route_check_nh_onlink(net, cfg, dev,
2781 extack);
2782 } else {
2783 err = ip6_route_check_nh(net, cfg, &dev, &idev);
2784 }
1da177e4
LT
2785 if (err)
2786 goto out;
2787 }
2788 err = -EINVAL;
d5d531cb
DA
2789 if (!dev) {
2790 NL_SET_ERR_MSG(extack, "Egress device not specified");
2791 goto out;
2792 } else if (dev->flags & IFF_LOOPBACK) {
2793 NL_SET_ERR_MSG(extack,
2794 "Egress device can not be loopback device for this route");
1da177e4 2795 goto out;
d5d531cb 2796 }
1da177e4
LT
2797 }
2798
2799 err = -ENODEV;
38308473 2800 if (!dev)
1da177e4
LT
2801 goto out;
2802
955ec4cb
DA
2803 if (!(dev->flags & IFF_UP)) {
2804 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2805 err = -ENETDOWN;
2806 goto out;
2807 }
2808
c3968a85
DW
2809 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2810 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 2811 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
2812 err = -EINVAL;
2813 goto out;
2814 }
4e3fd7a0 2815 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
2816 rt->rt6i_prefsrc.plen = 128;
2817 } else
2818 rt->rt6i_prefsrc.plen = 0;
2819
86872cb5 2820 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
2821
2822install_route:
5609b80a
IS
2823 if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
2824 !netif_carrier_ok(dev))
2825 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
fc1e64e1 2826 rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
d8d1f30b 2827 rt->dst.dev = dev;
1da177e4 2828 rt->rt6i_idev = idev;
c71099ac 2829 rt->rt6i_table = table;
63152fc0 2830
c346dca1 2831 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 2832
8c5b83f0 2833 return rt;
6b9ea5a6
RP
2834out:
2835 if (dev)
2836 dev_put(dev);
2837 if (idev)
2838 in6_dev_put(idev);
587fea74
WW
2839 if (rt)
2840 dst_release_immediate(&rt->dst);
6b9ea5a6 2841
8c5b83f0 2842 return ERR_PTR(err);
6b9ea5a6
RP
2843}
2844
333c4301
DA
2845int ip6_route_add(struct fib6_config *cfg,
2846 struct netlink_ext_ack *extack)
6b9ea5a6
RP
2847{
2848 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 2849 struct rt6_info *rt;
6b9ea5a6
RP
2850 int err;
2851
333c4301 2852 rt = ip6_route_info_create(cfg, extack);
8c5b83f0
RP
2853 if (IS_ERR(rt)) {
2854 err = PTR_ERR(rt);
2855 rt = NULL;
6b9ea5a6 2856 goto out;
8c5b83f0 2857 }
6b9ea5a6 2858
e715b6d3
FW
2859 err = ip6_convert_metrics(&mxc, cfg);
2860 if (err)
2861 goto out;
1da177e4 2862
333c4301 2863 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
e715b6d3
FW
2864
2865 kfree(mxc.mx);
6b9ea5a6 2866
e715b6d3 2867 return err;
1da177e4 2868out:
587fea74
WW
2869 if (rt)
2870 dst_release_immediate(&rt->dst);
6b9ea5a6 2871
1da177e4
LT
2872 return err;
2873}
2874
86872cb5 2875static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2876{
2877 int err;
c71099ac 2878 struct fib6_table *table;
d1918542 2879 struct net *net = dev_net(rt->dst.dev);
1da177e4 2880
a4c2fd7f 2881 if (rt == net->ipv6.ip6_null_entry) {
6825a26c
G
2882 err = -ENOENT;
2883 goto out;
2884 }
6c813a72 2885
c71099ac 2886 table = rt->rt6i_table;
66f5d6ce 2887 spin_lock_bh(&table->tb6_lock);
86872cb5 2888 err = fib6_del(rt, info);
66f5d6ce 2889 spin_unlock_bh(&table->tb6_lock);
1da177e4 2890
6825a26c 2891out:
94e187c0 2892 ip6_rt_put(rt);
1da177e4
LT
2893 return err;
2894}
2895
e0a1ad73
TG
2896int ip6_del_rt(struct rt6_info *rt)
2897{
4d1169c1 2898 struct nl_info info = {
d1918542 2899 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2900 };
528c4ceb 2901 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2902}
2903
0ae81335
DA
2904static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2905{
2906 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 2907 struct net *net = info->nl_net;
16a16cd3 2908 struct sk_buff *skb = NULL;
0ae81335 2909 struct fib6_table *table;
e3330039 2910 int err = -ENOENT;
0ae81335 2911
e3330039
WC
2912 if (rt == net->ipv6.ip6_null_entry)
2913 goto out_put;
0ae81335 2914 table = rt->rt6i_table;
66f5d6ce 2915 spin_lock_bh(&table->tb6_lock);
0ae81335
DA
2916
2917 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2918 struct rt6_info *sibling, *next_sibling;
2919
16a16cd3
DA
2920 /* prefer to send a single notification with all hops */
2921 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2922 if (skb) {
2923 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2924
e3330039 2925 if (rt6_fill_node(net, skb, rt,
16a16cd3
DA
2926 NULL, NULL, 0, RTM_DELROUTE,
2927 info->portid, seq, 0) < 0) {
2928 kfree_skb(skb);
2929 skb = NULL;
2930 } else
2931 info->skip_notify = 1;
2932 }
2933
0ae81335
DA
2934 list_for_each_entry_safe(sibling, next_sibling,
2935 &rt->rt6i_siblings,
2936 rt6i_siblings) {
2937 err = fib6_del(sibling, info);
2938 if (err)
e3330039 2939 goto out_unlock;
0ae81335
DA
2940 }
2941 }
2942
2943 err = fib6_del(rt, info);
e3330039 2944out_unlock:
66f5d6ce 2945 spin_unlock_bh(&table->tb6_lock);
e3330039 2946out_put:
0ae81335 2947 ip6_rt_put(rt);
16a16cd3
DA
2948
2949 if (skb) {
e3330039 2950 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
2951 info->nlh, gfp_any());
2952 }
0ae81335
DA
2953 return err;
2954}
2955
333c4301
DA
2956static int ip6_route_del(struct fib6_config *cfg,
2957 struct netlink_ext_ack *extack)
1da177e4 2958{
2b760fcf 2959 struct rt6_info *rt, *rt_cache;
c71099ac 2960 struct fib6_table *table;
1da177e4 2961 struct fib6_node *fn;
1da177e4
LT
2962 int err = -ESRCH;
2963
5578689a 2964 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
2965 if (!table) {
2966 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 2967 return err;
d5d531cb 2968 }
c71099ac 2969
66f5d6ce 2970 rcu_read_lock();
1da177e4 2971
c71099ac 2972 fn = fib6_locate(&table->tb6_root,
86872cb5 2973 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 2974 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 2975 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 2976
1da177e4 2977 if (fn) {
66f5d6ce 2978 for_each_fib6_node_rt_rcu(fn) {
2b760fcf
WW
2979 if (cfg->fc_flags & RTF_CACHE) {
2980 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
2981 &cfg->fc_src);
2982 if (!rt_cache)
2983 continue;
2984 rt = rt_cache;
2985 }
86872cb5 2986 if (cfg->fc_ifindex &&
d1918542
DM
2987 (!rt->dst.dev ||
2988 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 2989 continue;
86872cb5
TG
2990 if (cfg->fc_flags & RTF_GATEWAY &&
2991 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2992 continue;
86872cb5 2993 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2994 continue;
c2ed1880
M
2995 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2996 continue;
d3843fe5
WW
2997 if (!dst_hold_safe(&rt->dst))
2998 break;
66f5d6ce 2999 rcu_read_unlock();
1da177e4 3000
0ae81335
DA
3001 /* if gateway was specified only delete the one hop */
3002 if (cfg->fc_flags & RTF_GATEWAY)
3003 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3004
3005 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3006 }
3007 }
66f5d6ce 3008 rcu_read_unlock();
1da177e4
LT
3009
3010 return err;
3011}
3012
6700c270 3013static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3014{
a6279458 3015 struct netevent_redirect netevent;
e8599ff4 3016 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3017 struct ndisc_options ndopts;
3018 struct inet6_dev *in6_dev;
3019 struct neighbour *neigh;
71bcdba0 3020 struct rd_msg *msg;
6e157b6a
DM
3021 int optlen, on_link;
3022 u8 *lladdr;
e8599ff4 3023
29a3cad5 3024 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3025 optlen -= sizeof(*msg);
e8599ff4
DM
3026
3027 if (optlen < 0) {
6e157b6a 3028 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3029 return;
3030 }
3031
71bcdba0 3032 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3033
71bcdba0 3034 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3035 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3036 return;
3037 }
3038
6e157b6a 3039 on_link = 0;
71bcdba0 3040 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3041 on_link = 1;
71bcdba0 3042 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3043 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3044 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3045 return;
3046 }
3047
3048 in6_dev = __in6_dev_get(skb->dev);
3049 if (!in6_dev)
3050 return;
3051 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3052 return;
3053
3054 /* RFC2461 8.1:
3055 * The IP source address of the Redirect MUST be the same as the current
3056 * first-hop router for the specified ICMP Destination Address.
3057 */
3058
f997c55c 3059 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3060 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3061 return;
3062 }
6e157b6a
DM
3063
3064 lladdr = NULL;
e8599ff4
DM
3065 if (ndopts.nd_opts_tgt_lladdr) {
3066 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3067 skb->dev);
3068 if (!lladdr) {
3069 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3070 return;
3071 }
3072 }
3073
6e157b6a 3074 rt = (struct rt6_info *) dst;
ec13ad1d 3075 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3076 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3077 return;
6e157b6a 3078 }
e8599ff4 3079
6e157b6a
DM
3080 /* Redirect received -> path was valid.
3081 * Look, redirects are sent only in response to data packets,
3082 * so that this nexthop apparently is reachable. --ANK
3083 */
0dec879f 3084 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3085
71bcdba0 3086 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3087 if (!neigh)
3088 return;
a6279458 3089
1da177e4
LT
3090 /*
3091 * We have finally decided to accept it.
3092 */
3093
f997c55c 3094 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3095 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3096 NEIGH_UPDATE_F_OVERRIDE|
3097 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3098 NEIGH_UPDATE_F_ISROUTER)),
3099 NDISC_REDIRECT, &ndopts);
1da177e4 3100
83a09abd 3101 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 3102 if (!nrt)
1da177e4
LT
3103 goto out;
3104
3105 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3106 if (on_link)
3107 nrt->rt6i_flags &= ~RTF_GATEWAY;
3108
b91d5329 3109 nrt->rt6i_protocol = RTPROT_REDIRECT;
4e3fd7a0 3110 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3111
2b760fcf
WW
3112 /* No need to remove rt from the exception table if rt is
3113 * a cached route because rt6_insert_exception() will
3114 * takes care of it
3115 */
3116 if (rt6_insert_exception(nrt, rt)) {
3117 dst_release_immediate(&nrt->dst);
3118 goto out;
3119 }
1da177e4 3120
d8d1f30b
CG
3121 netevent.old = &rt->dst;
3122 netevent.new = &nrt->dst;
71bcdba0 3123 netevent.daddr = &msg->dest;
60592833 3124 netevent.neigh = neigh;
8d71740c
TT
3125 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3126
1da177e4 3127out:
e8599ff4 3128 neigh_release(neigh);
6e157b6a
DM
3129}
3130
1da177e4
LT
3131/*
3132 * Misc support functions
3133 */
3134
4b32b5ad
MKL
3135static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
3136{
3a2232e9 3137 BUG_ON(from->from);
4b32b5ad
MKL
3138
3139 rt->rt6i_flags &= ~RTF_EXPIRES;
3140 dst_hold(&from->dst);
3a2232e9 3141 rt->from = from;
4b32b5ad
MKL
3142 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
3143}
3144
83a09abd
MKL
3145static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
3146{
3147 rt->dst.input = ort->dst.input;
3148 rt->dst.output = ort->dst.output;
3149 rt->rt6i_dst = ort->rt6i_dst;
3150 rt->dst.error = ort->dst.error;
3151 rt->rt6i_idev = ort->rt6i_idev;
3152 if (rt->rt6i_idev)
3153 in6_dev_hold(rt->rt6i_idev);
3154 rt->dst.lastuse = jiffies;
3155 rt->rt6i_gateway = ort->rt6i_gateway;
3156 rt->rt6i_flags = ort->rt6i_flags;
3157 rt6_set_from(rt, ort);
3158 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 3159#ifdef CONFIG_IPV6_SUBTREES
83a09abd 3160 rt->rt6i_src = ort->rt6i_src;
1da177e4 3161#endif
83a09abd
MKL
3162 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
3163 rt->rt6i_table = ort->rt6i_table;
61adedf3 3164 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
3165}
3166
70ceb4f5 3167#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 3168static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 3169 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3170 const struct in6_addr *gwaddr,
3171 struct net_device *dev)
70ceb4f5 3172{
830218c1
DA
3173 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3174 int ifindex = dev->ifindex;
70ceb4f5
YH
3175 struct fib6_node *fn;
3176 struct rt6_info *rt = NULL;
c71099ac
TG
3177 struct fib6_table *table;
3178
830218c1 3179 table = fib6_get_table(net, tb_id);
38308473 3180 if (!table)
c71099ac 3181 return NULL;
70ceb4f5 3182
66f5d6ce 3183 rcu_read_lock();
38fbeeee 3184 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3185 if (!fn)
3186 goto out;
3187
66f5d6ce 3188 for_each_fib6_node_rt_rcu(fn) {
d1918542 3189 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
3190 continue;
3191 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3192 continue;
3193 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
3194 continue;
d3843fe5 3195 ip6_hold_safe(NULL, &rt, false);
70ceb4f5
YH
3196 break;
3197 }
3198out:
66f5d6ce 3199 rcu_read_unlock();
70ceb4f5
YH
3200 return rt;
3201}
3202
efa2cea0 3203static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 3204 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3205 const struct in6_addr *gwaddr,
3206 struct net_device *dev,
95c96174 3207 unsigned int pref)
70ceb4f5 3208{
86872cb5 3209 struct fib6_config cfg = {
238fc7ea 3210 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3211 .fc_ifindex = dev->ifindex,
86872cb5
TG
3212 .fc_dst_len = prefixlen,
3213 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3214 RTF_UP | RTF_PREF(pref),
b91d5329 3215 .fc_protocol = RTPROT_RA,
15e47304 3216 .fc_nlinfo.portid = 0,
efa2cea0
DL
3217 .fc_nlinfo.nlh = NULL,
3218 .fc_nlinfo.nl_net = net,
86872cb5
TG
3219 };
3220
830218c1 3221 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3222 cfg.fc_dst = *prefix;
3223 cfg.fc_gateway = *gwaddr;
70ceb4f5 3224
e317da96
YH
3225 /* We should treat it as a default route if prefix length is 0. */
3226 if (!prefixlen)
86872cb5 3227 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3228
333c4301 3229 ip6_route_add(&cfg, NULL);
70ceb4f5 3230
830218c1 3231 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3232}
3233#endif
3234
b71d1d42 3235struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 3236{
830218c1 3237 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
1da177e4 3238 struct rt6_info *rt;
c71099ac 3239 struct fib6_table *table;
1da177e4 3240
830218c1 3241 table = fib6_get_table(dev_net(dev), tb_id);
38308473 3242 if (!table)
c71099ac 3243 return NULL;
1da177e4 3244
66f5d6ce
WW
3245 rcu_read_lock();
3246 for_each_fib6_node_rt_rcu(&table->tb6_root) {
d1918542 3247 if (dev == rt->dst.dev &&
045927ff 3248 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
3249 ipv6_addr_equal(&rt->rt6i_gateway, addr))
3250 break;
3251 }
3252 if (rt)
d3843fe5 3253 ip6_hold_safe(NULL, &rt, false);
66f5d6ce 3254 rcu_read_unlock();
1da177e4
LT
3255 return rt;
3256}
3257
b71d1d42 3258struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
3259 struct net_device *dev,
3260 unsigned int pref)
1da177e4 3261{
86872cb5 3262 struct fib6_config cfg = {
ca254490 3263 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3264 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3265 .fc_ifindex = dev->ifindex,
3266 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3267 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3268 .fc_protocol = RTPROT_RA,
15e47304 3269 .fc_nlinfo.portid = 0,
5578689a 3270 .fc_nlinfo.nlh = NULL,
c346dca1 3271 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 3272 };
1da177e4 3273
4e3fd7a0 3274 cfg.fc_gateway = *gwaddr;
1da177e4 3275
333c4301 3276 if (!ip6_route_add(&cfg, NULL)) {
830218c1
DA
3277 struct fib6_table *table;
3278
3279 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3280 if (table)
3281 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3282 }
1da177e4 3283
1da177e4
LT
3284 return rt6_get_dflt_router(gwaddr, dev);
3285}
3286
830218c1 3287static void __rt6_purge_dflt_routers(struct fib6_table *table)
1da177e4
LT
3288{
3289 struct rt6_info *rt;
3290
3291restart:
66f5d6ce
WW
3292 rcu_read_lock();
3293 for_each_fib6_node_rt_rcu(&table->tb6_root) {
3e8b0ac3
LC
3294 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3295 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d3843fe5 3296 if (dst_hold_safe(&rt->dst)) {
66f5d6ce 3297 rcu_read_unlock();
d3843fe5
WW
3298 ip6_del_rt(rt);
3299 } else {
66f5d6ce 3300 rcu_read_unlock();
d3843fe5 3301 }
1da177e4
LT
3302 goto restart;
3303 }
3304 }
66f5d6ce 3305 rcu_read_unlock();
830218c1
DA
3306
3307 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3308}
3309
3310void rt6_purge_dflt_routers(struct net *net)
3311{
3312 struct fib6_table *table;
3313 struct hlist_head *head;
3314 unsigned int h;
3315
3316 rcu_read_lock();
3317
3318 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3319 head = &net->ipv6.fib_table_hash[h];
3320 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3321 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3322 __rt6_purge_dflt_routers(table);
3323 }
3324 }
3325
3326 rcu_read_unlock();
1da177e4
LT
3327}
3328
5578689a
DL
3329static void rtmsg_to_fib6_config(struct net *net,
3330 struct in6_rtmsg *rtmsg,
86872cb5
TG
3331 struct fib6_config *cfg)
3332{
3333 memset(cfg, 0, sizeof(*cfg));
3334
ca254490
DA
3335 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3336 : RT6_TABLE_MAIN;
86872cb5
TG
3337 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3338 cfg->fc_metric = rtmsg->rtmsg_metric;
3339 cfg->fc_expires = rtmsg->rtmsg_info;
3340 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3341 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3342 cfg->fc_flags = rtmsg->rtmsg_flags;
3343
5578689a 3344 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3345
4e3fd7a0
AD
3346 cfg->fc_dst = rtmsg->rtmsg_dst;
3347 cfg->fc_src = rtmsg->rtmsg_src;
3348 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3349}
3350
5578689a 3351int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3352{
86872cb5 3353 struct fib6_config cfg;
1da177e4
LT
3354 struct in6_rtmsg rtmsg;
3355 int err;
3356
67ba4152 3357 switch (cmd) {
1da177e4
LT
3358 case SIOCADDRT: /* Add a route */
3359 case SIOCDELRT: /* Delete a route */
af31f412 3360 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3361 return -EPERM;
3362 err = copy_from_user(&rtmsg, arg,
3363 sizeof(struct in6_rtmsg));
3364 if (err)
3365 return -EFAULT;
86872cb5 3366
5578689a 3367 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3368
1da177e4
LT
3369 rtnl_lock();
3370 switch (cmd) {
3371 case SIOCADDRT:
333c4301 3372 err = ip6_route_add(&cfg, NULL);
1da177e4
LT
3373 break;
3374 case SIOCDELRT:
333c4301 3375 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3376 break;
3377 default:
3378 err = -EINVAL;
3379 }
3380 rtnl_unlock();
3381
3382 return err;
3ff50b79 3383 }
1da177e4
LT
3384
3385 return -EINVAL;
3386}
3387
3388/*
3389 * Drop the packet on the floor
3390 */
3391
d5fdd6ba 3392static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3393{
612f09e8 3394 int type;
adf30907 3395 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3396 switch (ipstats_mib_noroutes) {
3397 case IPSTATS_MIB_INNOROUTES:
0660e03f 3398 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3399 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
3400 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3401 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3402 break;
3403 }
3404 /* FALLTHROUGH */
3405 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3406 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3407 ipstats_mib_noroutes);
612f09e8
YH
3408 break;
3409 }
3ffe533c 3410 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3411 kfree_skb(skb);
3412 return 0;
3413}
3414
9ce8ade0
TG
3415static int ip6_pkt_discard(struct sk_buff *skb)
3416{
612f09e8 3417 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3418}
3419
ede2059d 3420static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3421{
adf30907 3422 skb->dev = skb_dst(skb)->dev;
612f09e8 3423 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3424}
3425
9ce8ade0
TG
3426static int ip6_pkt_prohibit(struct sk_buff *skb)
3427{
612f09e8 3428 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3429}
3430
ede2059d 3431static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3432{
adf30907 3433 skb->dev = skb_dst(skb)->dev;
612f09e8 3434 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3435}
3436
1da177e4
LT
3437/*
3438 * Allocate a dst for local (unicast / anycast) address.
3439 */
3440
3441struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
3442 const struct in6_addr *addr,
8f031519 3443 bool anycast)
1da177e4 3444{
ca254490 3445 u32 tb_id;
c346dca1 3446 struct net *net = dev_net(idev->dev);
4832c30d 3447 struct net_device *dev = idev->dev;
5f02ce24
DA
3448 struct rt6_info *rt;
3449
5f02ce24 3450 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
a3300ef4 3451 if (!rt)
1da177e4
LT
3452 return ERR_PTR(-ENOMEM);
3453
1da177e4
LT
3454 in6_dev_hold(idev);
3455
11d53b49 3456 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
3457 rt->dst.input = ip6_input;
3458 rt->dst.output = ip6_output;
1da177e4 3459 rt->rt6i_idev = idev;
1da177e4 3460
94b5e0f9 3461 rt->rt6i_protocol = RTPROT_KERNEL;
1da177e4 3462 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
3463 if (anycast)
3464 rt->rt6i_flags |= RTF_ANYCAST;
3465 else
1da177e4 3466 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 3467
550bab42 3468 rt->rt6i_gateway = *addr;
4e3fd7a0 3469 rt->rt6i_dst.addr = *addr;
1da177e4 3470 rt->rt6i_dst.plen = 128;
ca254490
DA
3471 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3472 rt->rt6i_table = fib6_get_table(net, tb_id);
1da177e4 3473
1da177e4
LT
3474 return rt;
3475}
3476
c3968a85
DW
3477/* remove deleted ip from prefsrc entries */
3478struct arg_dev_net_ip {
3479 struct net_device *dev;
3480 struct net *net;
3481 struct in6_addr *addr;
3482};
3483
3484static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3485{
3486 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3487 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3488 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3489
d1918542 3490 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
3491 rt != net->ipv6.ip6_null_entry &&
3492 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
60006a48 3493 spin_lock_bh(&rt6_exception_lock);
c3968a85
DW
3494 /* remove prefsrc entry */
3495 rt->rt6i_prefsrc.plen = 0;
60006a48
WW
3496 /* need to update cache as well */
3497 rt6_exceptions_remove_prefsrc(rt);
3498 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3499 }
3500 return 0;
3501}
3502
3503void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3504{
3505 struct net *net = dev_net(ifp->idev->dev);
3506 struct arg_dev_net_ip adni = {
3507 .dev = ifp->idev->dev,
3508 .net = net,
3509 .addr = &ifp->addr,
3510 };
0c3584d5 3511 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3512}
3513
be7a010d 3514#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3515
3516/* Remove routers and update dst entries when gateway turn into host. */
3517static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3518{
3519 struct in6_addr *gateway = (struct in6_addr *)arg;
3520
2b760fcf
WW
3521 if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3522 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
be7a010d
DJ
3523 return -1;
3524 }
b16cb459
WW
3525
3526 /* Further clean up cached routes in exception table.
3527 * This is needed because cached route may have a different
3528 * gateway than its 'parent' in the case of an ip redirect.
3529 */
3530 rt6_exceptions_clean_tohost(rt, gateway);
3531
be7a010d
DJ
3532 return 0;
3533}
3534
3535void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3536{
3537 fib6_clean_all(net, fib6_clean_tohost, gateway);
3538}
3539
2127d95a
IS
3540struct arg_netdev_event {
3541 const struct net_device *dev;
4c981e28
IS
3542 union {
3543 unsigned int nh_flags;
3544 unsigned long event;
3545 };
2127d95a
IS
3546};
3547
d7dedee1
IS
3548static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
3549{
3550 struct rt6_info *iter;
3551 struct fib6_node *fn;
3552
3553 fn = rcu_dereference_protected(rt->rt6i_node,
3554 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3555 iter = rcu_dereference_protected(fn->leaf,
3556 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3557 while (iter) {
3558 if (iter->rt6i_metric == rt->rt6i_metric &&
3559 rt6_qualify_for_ecmp(iter))
3560 return iter;
3561 iter = rcu_dereference_protected(iter->rt6_next,
3562 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3563 }
3564
3565 return NULL;
3566}
3567
3568static bool rt6_is_dead(const struct rt6_info *rt)
3569{
3570 if (rt->rt6i_nh_flags & RTNH_F_DEAD ||
3571 (rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
3572 rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
3573 return true;
3574
3575 return false;
3576}
3577
3578static int rt6_multipath_total_weight(const struct rt6_info *rt)
3579{
3580 struct rt6_info *iter;
3581 int total = 0;
3582
3583 if (!rt6_is_dead(rt))
398958ae 3584 total += rt->rt6i_nh_weight;
d7dedee1
IS
3585
3586 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
3587 if (!rt6_is_dead(iter))
398958ae 3588 total += iter->rt6i_nh_weight;
d7dedee1
IS
3589 }
3590
3591 return total;
3592}
3593
3594static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
3595{
3596 int upper_bound = -1;
3597
3598 if (!rt6_is_dead(rt)) {
398958ae 3599 *weight += rt->rt6i_nh_weight;
d7dedee1
IS
3600 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3601 total) - 1;
3602 }
3603 atomic_set(&rt->rt6i_nh_upper_bound, upper_bound);
3604}
3605
3606static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
3607{
3608 struct rt6_info *iter;
3609 int weight = 0;
3610
3611 rt6_upper_bound_set(rt, &weight, total);
3612
3613 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3614 rt6_upper_bound_set(iter, &weight, total);
3615}
3616
3617void rt6_multipath_rebalance(struct rt6_info *rt)
3618{
3619 struct rt6_info *first;
3620 int total;
3621
3622 /* In case the entire multipath route was marked for flushing,
3623 * then there is no need to rebalance upon the removal of every
3624 * sibling route.
3625 */
3626 if (!rt->rt6i_nsiblings || rt->should_flush)
3627 return;
3628
3629 /* During lookup routes are evaluated in order, so we need to
3630 * make sure upper bounds are assigned from the first sibling
3631 * onwards.
3632 */
3633 first = rt6_multipath_first_sibling(rt);
3634 if (WARN_ON_ONCE(!first))
3635 return;
3636
3637 total = rt6_multipath_total_weight(first);
3638 rt6_multipath_upper_bound_set(first, total);
3639}
3640
2127d95a
IS
3641static int fib6_ifup(struct rt6_info *rt, void *p_arg)
3642{
3643 const struct arg_netdev_event *arg = p_arg;
3644 const struct net *net = dev_net(arg->dev);
3645
1de178ed 3646 if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) {
2127d95a 3647 rt->rt6i_nh_flags &= ~arg->nh_flags;
1de178ed 3648 fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt);
d7dedee1 3649 rt6_multipath_rebalance(rt);
1de178ed 3650 }
2127d95a
IS
3651
3652 return 0;
3653}
3654
3655void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3656{
3657 struct arg_netdev_event arg = {
3658 .dev = dev,
6802f3ad
IS
3659 {
3660 .nh_flags = nh_flags,
3661 },
2127d95a
IS
3662 };
3663
3664 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3665 arg.nh_flags |= RTNH_F_LINKDOWN;
3666
3667 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3668}
3669
1de178ed
IS
3670static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
3671 const struct net_device *dev)
3672{
3673 struct rt6_info *iter;
3674
3675 if (rt->dst.dev == dev)
3676 return true;
3677 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3678 if (iter->dst.dev == dev)
3679 return true;
3680
3681 return false;
3682}
3683
3684static void rt6_multipath_flush(struct rt6_info *rt)
3685{
3686 struct rt6_info *iter;
3687
3688 rt->should_flush = 1;
3689 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3690 iter->should_flush = 1;
3691}
3692
3693static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
3694 const struct net_device *down_dev)
3695{
3696 struct rt6_info *iter;
3697 unsigned int dead = 0;
3698
3699 if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD)
3700 dead++;
3701 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3702 if (iter->dst.dev == down_dev ||
3703 iter->rt6i_nh_flags & RTNH_F_DEAD)
3704 dead++;
3705
3706 return dead;
3707}
3708
3709static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
3710 const struct net_device *dev,
3711 unsigned int nh_flags)
3712{
3713 struct rt6_info *iter;
3714
3715 if (rt->dst.dev == dev)
3716 rt->rt6i_nh_flags |= nh_flags;
3717 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3718 if (iter->dst.dev == dev)
3719 iter->rt6i_nh_flags |= nh_flags;
3720}
3721
a1a22c12 3722/* called with write lock held for table with rt */
4c981e28 3723static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
1da177e4 3724{
4c981e28
IS
3725 const struct arg_netdev_event *arg = p_arg;
3726 const struct net_device *dev = arg->dev;
3727 const struct net *net = dev_net(dev);
8ed67789 3728
1de178ed 3729 if (rt == net->ipv6.ip6_null_entry)
27c6fa73
IS
3730 return 0;
3731
3732 switch (arg->event) {
3733 case NETDEV_UNREGISTER:
1de178ed 3734 return rt->dst.dev == dev ? -1 : 0;
27c6fa73 3735 case NETDEV_DOWN:
1de178ed 3736 if (rt->should_flush)
27c6fa73 3737 return -1;
1de178ed
IS
3738 if (!rt->rt6i_nsiblings)
3739 return rt->dst.dev == dev ? -1 : 0;
3740 if (rt6_multipath_uses_dev(rt, dev)) {
3741 unsigned int count;
3742
3743 count = rt6_multipath_dead_count(rt, dev);
3744 if (rt->rt6i_nsiblings + 1 == count) {
3745 rt6_multipath_flush(rt);
3746 return -1;
3747 }
3748 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3749 RTNH_F_LINKDOWN);
3750 fib6_update_sernum(rt);
d7dedee1 3751 rt6_multipath_rebalance(rt);
1de178ed
IS
3752 }
3753 return -2;
27c6fa73 3754 case NETDEV_CHANGE:
1de178ed
IS
3755 if (rt->dst.dev != dev ||
3756 rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73
IS
3757 break;
3758 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 3759 rt6_multipath_rebalance(rt);
27c6fa73 3760 break;
2b241361 3761 }
c159d30c 3762
1da177e4
LT
3763 return 0;
3764}
3765
27c6fa73 3766void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 3767{
4c981e28 3768 struct arg_netdev_event arg = {
8ed67789 3769 .dev = dev,
6802f3ad
IS
3770 {
3771 .event = event,
3772 },
8ed67789
DL
3773 };
3774
4c981e28
IS
3775 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
3776}
3777
3778void rt6_disable_ip(struct net_device *dev, unsigned long event)
3779{
3780 rt6_sync_down_dev(dev, event);
3781 rt6_uncached_list_flush_dev(dev_net(dev), dev);
3782 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
3783}
3784
95c96174 3785struct rt6_mtu_change_arg {
1da177e4 3786 struct net_device *dev;
95c96174 3787 unsigned int mtu;
1da177e4
LT
3788};
3789
3790static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
3791{
3792 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
3793 struct inet6_dev *idev;
3794
3795 /* In IPv6 pmtu discovery is not optional,
3796 so that RTAX_MTU lock cannot disable it.
3797 We still use this lock to block changes
3798 caused by addrconf/ndisc.
3799 */
3800
3801 idev = __in6_dev_get(arg->dev);
38308473 3802 if (!idev)
1da177e4
LT
3803 return 0;
3804
3805 /* For administrative MTU increase, there is no way to discover
3806 IPv6 PMTU increase, so PMTU increase should be updated here.
3807 Since RFC 1981 doesn't include administrative MTU increase
3808 update PMTU increase is a MUST. (i.e. jumbo frame)
3809 */
3810 /*
3811 If new MTU is less than route PMTU, this new MTU will be the
3812 lowest MTU in the path, update the route PMTU to reflect PMTU
3813 decreases; if new MTU is greater than route PMTU, and the
3814 old MTU is the lowest MTU in the path, update the route PMTU
3815 to reflect the increase. In this case if the other nodes' MTU
3816 also have the lowest MTU, TOO BIG MESSAGE will be lead to
67c408cf 3817 PMTU discovery.
1da177e4 3818 */
d1918542 3819 if (rt->dst.dev == arg->dev &&
fb56be83 3820 dst_metric_raw(&rt->dst, RTAX_MTU) &&
4b32b5ad 3821 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
f5bbe7ee 3822 spin_lock_bh(&rt6_exception_lock);
2b760fcf
WW
3823 if (dst_mtu(&rt->dst) >= arg->mtu ||
3824 (dst_mtu(&rt->dst) < arg->mtu &&
3825 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
4b32b5ad
MKL
3826 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
3827 }
f5bbe7ee
WW
3828 rt6_exceptions_update_pmtu(rt, arg->mtu);
3829 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 3830 }
1da177e4
LT
3831 return 0;
3832}
3833
95c96174 3834void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 3835{
c71099ac
TG
3836 struct rt6_mtu_change_arg arg = {
3837 .dev = dev,
3838 .mtu = mtu,
3839 };
1da177e4 3840
0c3584d5 3841 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
3842}
3843
ef7c79ed 3844static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 3845 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 3846 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 3847 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
3848 [RTA_PRIORITY] = { .type = NLA_U32 },
3849 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 3850 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 3851 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
3852 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
3853 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 3854 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 3855 [RTA_UID] = { .type = NLA_U32 },
3b45a410 3856 [RTA_MARK] = { .type = NLA_U32 },
86872cb5
TG
3857};
3858
3859static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
3860 struct fib6_config *cfg,
3861 struct netlink_ext_ack *extack)
1da177e4 3862{
86872cb5
TG
3863 struct rtmsg *rtm;
3864 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 3865 unsigned int pref;
86872cb5 3866 int err;
1da177e4 3867
fceb6435
JB
3868 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3869 NULL);
86872cb5
TG
3870 if (err < 0)
3871 goto errout;
1da177e4 3872
86872cb5
TG
3873 err = -EINVAL;
3874 rtm = nlmsg_data(nlh);
3875 memset(cfg, 0, sizeof(*cfg));
3876
3877 cfg->fc_table = rtm->rtm_table;
3878 cfg->fc_dst_len = rtm->rtm_dst_len;
3879 cfg->fc_src_len = rtm->rtm_src_len;
3880 cfg->fc_flags = RTF_UP;
3881 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 3882 cfg->fc_type = rtm->rtm_type;
86872cb5 3883
ef2c7d7b
ND
3884 if (rtm->rtm_type == RTN_UNREACHABLE ||
3885 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
3886 rtm->rtm_type == RTN_PROHIBIT ||
3887 rtm->rtm_type == RTN_THROW)
86872cb5
TG
3888 cfg->fc_flags |= RTF_REJECT;
3889
ab79ad14
3890 if (rtm->rtm_type == RTN_LOCAL)
3891 cfg->fc_flags |= RTF_LOCAL;
3892
1f56a01f
MKL
3893 if (rtm->rtm_flags & RTM_F_CLONED)
3894 cfg->fc_flags |= RTF_CACHE;
3895
fc1e64e1
DA
3896 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
3897
15e47304 3898 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 3899 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 3900 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
3901
3902 if (tb[RTA_GATEWAY]) {
67b61f6c 3903 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 3904 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 3905 }
86872cb5
TG
3906
3907 if (tb[RTA_DST]) {
3908 int plen = (rtm->rtm_dst_len + 7) >> 3;
3909
3910 if (nla_len(tb[RTA_DST]) < plen)
3911 goto errout;
3912
3913 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 3914 }
86872cb5
TG
3915
3916 if (tb[RTA_SRC]) {
3917 int plen = (rtm->rtm_src_len + 7) >> 3;
3918
3919 if (nla_len(tb[RTA_SRC]) < plen)
3920 goto errout;
3921
3922 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 3923 }
86872cb5 3924
c3968a85 3925 if (tb[RTA_PREFSRC])
67b61f6c 3926 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 3927
86872cb5
TG
3928 if (tb[RTA_OIF])
3929 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
3930
3931 if (tb[RTA_PRIORITY])
3932 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
3933
3934 if (tb[RTA_METRICS]) {
3935 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
3936 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 3937 }
86872cb5
TG
3938
3939 if (tb[RTA_TABLE])
3940 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
3941
51ebd318
ND
3942 if (tb[RTA_MULTIPATH]) {
3943 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
3944 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
3945
3946 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 3947 cfg->fc_mp_len, extack);
9ed59592
DA
3948 if (err < 0)
3949 goto errout;
51ebd318
ND
3950 }
3951
c78ba6d6
LR
3952 if (tb[RTA_PREF]) {
3953 pref = nla_get_u8(tb[RTA_PREF]);
3954 if (pref != ICMPV6_ROUTER_PREF_LOW &&
3955 pref != ICMPV6_ROUTER_PREF_HIGH)
3956 pref = ICMPV6_ROUTER_PREF_MEDIUM;
3957 cfg->fc_flags |= RTF_PREF(pref);
3958 }
3959
19e42e45
RP
3960 if (tb[RTA_ENCAP])
3961 cfg->fc_encap = tb[RTA_ENCAP];
3962
9ed59592 3963 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
3964 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3965
c255bd68 3966 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
3967 if (err < 0)
3968 goto errout;
3969 }
3970
32bc201e
XL
3971 if (tb[RTA_EXPIRES]) {
3972 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3973
3974 if (addrconf_finite_timeout(timeout)) {
3975 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3976 cfg->fc_flags |= RTF_EXPIRES;
3977 }
3978 }
3979
86872cb5
TG
3980 err = 0;
3981errout:
3982 return err;
1da177e4
LT
3983}
3984
6b9ea5a6
RP
3985struct rt6_nh {
3986 struct rt6_info *rt6_info;
3987 struct fib6_config r_cfg;
3988 struct mx6_config mxc;
3989 struct list_head next;
3990};
3991
3992static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3993{
3994 struct rt6_nh *nh;
3995
3996 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 3997 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
3998 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3999 nh->r_cfg.fc_ifindex);
4000 }
4001}
4002
4003static int ip6_route_info_append(struct list_head *rt6_nh_list,
4004 struct rt6_info *rt, struct fib6_config *r_cfg)
4005{
4006 struct rt6_nh *nh;
6b9ea5a6
RP
4007 int err = -EEXIST;
4008
4009 list_for_each_entry(nh, rt6_nh_list, next) {
4010 /* check if rt6_info already exists */
f06b7549 4011 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
6b9ea5a6
RP
4012 return err;
4013 }
4014
4015 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4016 if (!nh)
4017 return -ENOMEM;
4018 nh->rt6_info = rt;
4019 err = ip6_convert_metrics(&nh->mxc, r_cfg);
4020 if (err) {
4021 kfree(nh);
4022 return err;
4023 }
4024 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4025 list_add_tail(&nh->next, rt6_nh_list);
4026
4027 return 0;
4028}
4029
3b1137fe
DA
4030static void ip6_route_mpath_notify(struct rt6_info *rt,
4031 struct rt6_info *rt_last,
4032 struct nl_info *info,
4033 __u16 nlflags)
4034{
4035 /* if this is an APPEND route, then rt points to the first route
4036 * inserted and rt_last points to last route inserted. Userspace
4037 * wants a consistent dump of the route which starts at the first
4038 * nexthop. Since sibling routes are always added at the end of
4039 * the list, find the first sibling of the last route appended
4040 */
4041 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
4042 rt = list_first_entry(&rt_last->rt6i_siblings,
4043 struct rt6_info,
4044 rt6i_siblings);
4045 }
4046
4047 if (rt)
4048 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4049}
4050
333c4301
DA
4051static int ip6_route_multipath_add(struct fib6_config *cfg,
4052 struct netlink_ext_ack *extack)
51ebd318 4053{
3b1137fe
DA
4054 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
4055 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4056 struct fib6_config r_cfg;
4057 struct rtnexthop *rtnh;
6b9ea5a6
RP
4058 struct rt6_info *rt;
4059 struct rt6_nh *err_nh;
4060 struct rt6_nh *nh, *nh_safe;
3b1137fe 4061 __u16 nlflags;
51ebd318
ND
4062 int remaining;
4063 int attrlen;
6b9ea5a6
RP
4064 int err = 1;
4065 int nhn = 0;
4066 int replace = (cfg->fc_nlinfo.nlh &&
4067 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4068 LIST_HEAD(rt6_nh_list);
51ebd318 4069
3b1137fe
DA
4070 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4071 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4072 nlflags |= NLM_F_APPEND;
4073
35f1b4e9 4074 remaining = cfg->fc_mp_len;
51ebd318 4075 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4076
6b9ea5a6
RP
4077 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
4078 * rt6_info structs per nexthop
4079 */
51ebd318
ND
4080 while (rtnh_ok(rtnh, remaining)) {
4081 memcpy(&r_cfg, cfg, sizeof(*cfg));
4082 if (rtnh->rtnh_ifindex)
4083 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4084
4085 attrlen = rtnh_attrlen(rtnh);
4086 if (attrlen > 0) {
4087 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4088
4089 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4090 if (nla) {
67b61f6c 4091 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4092 r_cfg.fc_flags |= RTF_GATEWAY;
4093 }
19e42e45
RP
4094 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4095 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4096 if (nla)
4097 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4098 }
6b9ea5a6 4099
333c4301 4100 rt = ip6_route_info_create(&r_cfg, extack);
8c5b83f0
RP
4101 if (IS_ERR(rt)) {
4102 err = PTR_ERR(rt);
4103 rt = NULL;
6b9ea5a6 4104 goto cleanup;
8c5b83f0 4105 }
6b9ea5a6 4106
398958ae
IS
4107 rt->rt6i_nh_weight = rtnh->rtnh_hops + 1;
4108
6b9ea5a6 4109 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 4110 if (err) {
587fea74 4111 dst_release_immediate(&rt->dst);
6b9ea5a6
RP
4112 goto cleanup;
4113 }
4114
4115 rtnh = rtnh_next(rtnh, &remaining);
4116 }
4117
3b1137fe
DA
4118 /* for add and replace send one notification with all nexthops.
4119 * Skip the notification in fib6_add_rt2node and send one with
4120 * the full route when done
4121 */
4122 info->skip_notify = 1;
4123
6b9ea5a6
RP
4124 err_nh = NULL;
4125 list_for_each_entry(nh, &rt6_nh_list, next) {
3b1137fe 4126 rt_last = nh->rt6_info;
333c4301 4127 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3b1137fe
DA
4128 /* save reference to first route for notification */
4129 if (!rt_notif && !err)
4130 rt_notif = nh->rt6_info;
4131
6b9ea5a6
RP
4132 /* nh->rt6_info is used or freed at this point, reset to NULL*/
4133 nh->rt6_info = NULL;
4134 if (err) {
4135 if (replace && nhn)
4136 ip6_print_replace_route_err(&rt6_nh_list);
4137 err_nh = nh;
4138 goto add_errout;
51ebd318 4139 }
6b9ea5a6 4140
1a72418b 4141 /* Because each route is added like a single route we remove
27596472
MK
4142 * these flags after the first nexthop: if there is a collision,
4143 * we have already failed to add the first nexthop:
4144 * fib6_add_rt2node() has rejected it; when replacing, old
4145 * nexthops have been replaced by first new, the rest should
4146 * be added to it.
1a72418b 4147 */
27596472
MK
4148 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4149 NLM_F_REPLACE);
6b9ea5a6
RP
4150 nhn++;
4151 }
4152
3b1137fe
DA
4153 /* success ... tell user about new route */
4154 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4155 goto cleanup;
4156
4157add_errout:
3b1137fe
DA
4158 /* send notification for routes that were added so that
4159 * the delete notifications sent by ip6_route_del are
4160 * coherent
4161 */
4162 if (rt_notif)
4163 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4164
6b9ea5a6
RP
4165 /* Delete routes that were already added */
4166 list_for_each_entry(nh, &rt6_nh_list, next) {
4167 if (err_nh == nh)
4168 break;
333c4301 4169 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4170 }
4171
4172cleanup:
4173 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
587fea74
WW
4174 if (nh->rt6_info)
4175 dst_release_immediate(&nh->rt6_info->dst);
52fe51f8 4176 kfree(nh->mxc.mx);
6b9ea5a6
RP
4177 list_del(&nh->next);
4178 kfree(nh);
4179 }
4180
4181 return err;
4182}
4183
333c4301
DA
4184static int ip6_route_multipath_del(struct fib6_config *cfg,
4185 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4186{
4187 struct fib6_config r_cfg;
4188 struct rtnexthop *rtnh;
4189 int remaining;
4190 int attrlen;
4191 int err = 1, last_err = 0;
4192
4193 remaining = cfg->fc_mp_len;
4194 rtnh = (struct rtnexthop *)cfg->fc_mp;
4195
4196 /* Parse a Multipath Entry */
4197 while (rtnh_ok(rtnh, remaining)) {
4198 memcpy(&r_cfg, cfg, sizeof(*cfg));
4199 if (rtnh->rtnh_ifindex)
4200 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4201
4202 attrlen = rtnh_attrlen(rtnh);
4203 if (attrlen > 0) {
4204 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4205
4206 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4207 if (nla) {
4208 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4209 r_cfg.fc_flags |= RTF_GATEWAY;
4210 }
4211 }
333c4301 4212 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4213 if (err)
4214 last_err = err;
4215
51ebd318
ND
4216 rtnh = rtnh_next(rtnh, &remaining);
4217 }
4218
4219 return last_err;
4220}
4221
c21ef3e3
DA
4222static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4223 struct netlink_ext_ack *extack)
1da177e4 4224{
86872cb5
TG
4225 struct fib6_config cfg;
4226 int err;
1da177e4 4227
333c4301 4228 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4229 if (err < 0)
4230 return err;
4231
51ebd318 4232 if (cfg.fc_mp)
333c4301 4233 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4234 else {
4235 cfg.fc_delete_all_nh = 1;
333c4301 4236 return ip6_route_del(&cfg, extack);
0ae81335 4237 }
1da177e4
LT
4238}
4239
c21ef3e3
DA
4240static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4241 struct netlink_ext_ack *extack)
1da177e4 4242{
86872cb5
TG
4243 struct fib6_config cfg;
4244 int err;
1da177e4 4245
333c4301 4246 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4247 if (err < 0)
4248 return err;
4249
51ebd318 4250 if (cfg.fc_mp)
333c4301 4251 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4252 else
333c4301 4253 return ip6_route_add(&cfg, extack);
1da177e4
LT
4254}
4255
beb1afac 4256static size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f 4257{
beb1afac
DA
4258 int nexthop_len = 0;
4259
4260 if (rt->rt6i_nsiblings) {
4261 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4262 + NLA_ALIGN(sizeof(struct rtnexthop))
4263 + nla_total_size(16) /* RTA_GATEWAY */
beb1afac
DA
4264 + lwtunnel_get_encap_size(rt->dst.lwtstate);
4265
4266 nexthop_len *= rt->rt6i_nsiblings;
4267 }
4268
339bf98f
TG
4269 return NLMSG_ALIGN(sizeof(struct rtmsg))
4270 + nla_total_size(16) /* RTA_SRC */
4271 + nla_total_size(16) /* RTA_DST */
4272 + nla_total_size(16) /* RTA_GATEWAY */
4273 + nla_total_size(16) /* RTA_PREFSRC */
4274 + nla_total_size(4) /* RTA_TABLE */
4275 + nla_total_size(4) /* RTA_IIF */
4276 + nla_total_size(4) /* RTA_OIF */
4277 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4278 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4279 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4280 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4281 + nla_total_size(1) /* RTA_PREF */
beb1afac
DA
4282 + lwtunnel_get_encap_size(rt->dst.lwtstate)
4283 + nexthop_len;
4284}
4285
4286static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
5be083ce 4287 unsigned int *flags, bool skip_oif)
beb1afac 4288{
f9d882ea
IS
4289 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
4290 *flags |= RTNH_F_DEAD;
4291
44c9f2f2 4292 if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) {
beb1afac
DA
4293 *flags |= RTNH_F_LINKDOWN;
4294 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
4295 *flags |= RTNH_F_DEAD;
4296 }
4297
4298 if (rt->rt6i_flags & RTF_GATEWAY) {
4299 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
4300 goto nla_put_failure;
4301 }
4302
fc1e64e1 4303 *flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK);
fe400799 4304 if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
4305 *flags |= RTNH_F_OFFLOAD;
4306
5be083ce
DA
4307 /* not needed for multipath encoding b/c it has a rtnexthop struct */
4308 if (!skip_oif && rt->dst.dev &&
beb1afac
DA
4309 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
4310 goto nla_put_failure;
4311
4312 if (rt->dst.lwtstate &&
4313 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
4314 goto nla_put_failure;
4315
4316 return 0;
4317
4318nla_put_failure:
4319 return -EMSGSIZE;
4320}
4321
5be083ce 4322/* add multipath next hop */
beb1afac
DA
4323static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
4324{
4325 struct rtnexthop *rtnh;
4326 unsigned int flags = 0;
4327
4328 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4329 if (!rtnh)
4330 goto nla_put_failure;
4331
398958ae 4332 rtnh->rtnh_hops = rt->rt6i_nh_weight - 1;
beb1afac
DA
4333 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
4334
5be083ce 4335 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4336 goto nla_put_failure;
4337
4338 rtnh->rtnh_flags = flags;
4339
4340 /* length of rtnetlink header + attributes */
4341 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4342
4343 return 0;
4344
4345nla_put_failure:
4346 return -EMSGSIZE;
339bf98f
TG
4347}
4348
191cd582
BH
4349static int rt6_fill_node(struct net *net,
4350 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 4351 struct in6_addr *dst, struct in6_addr *src,
15e47304 4352 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4353 unsigned int flags)
1da177e4 4354{
4b32b5ad 4355 u32 metrics[RTAX_MAX];
1da177e4 4356 struct rtmsg *rtm;
2d7202bf 4357 struct nlmsghdr *nlh;
e3703b3d 4358 long expires;
9e762a4a 4359 u32 table;
1da177e4 4360
15e47304 4361 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4362 if (!nlh)
26932566 4363 return -EMSGSIZE;
2d7202bf
TG
4364
4365 rtm = nlmsg_data(nlh);
1da177e4
LT
4366 rtm->rtm_family = AF_INET6;
4367 rtm->rtm_dst_len = rt->rt6i_dst.plen;
4368 rtm->rtm_src_len = rt->rt6i_src.plen;
4369 rtm->rtm_tos = 0;
c71099ac 4370 if (rt->rt6i_table)
9e762a4a 4371 table = rt->rt6i_table->tb6_id;
c71099ac 4372 else
9e762a4a
PM
4373 table = RT6_TABLE_UNSPEC;
4374 rtm->rtm_table = table;
c78679e8
DM
4375 if (nla_put_u32(skb, RTA_TABLE, table))
4376 goto nla_put_failure;
ef2c7d7b
ND
4377 if (rt->rt6i_flags & RTF_REJECT) {
4378 switch (rt->dst.error) {
4379 case -EINVAL:
4380 rtm->rtm_type = RTN_BLACKHOLE;
4381 break;
4382 case -EACCES:
4383 rtm->rtm_type = RTN_PROHIBIT;
4384 break;
b4949ab2
ND
4385 case -EAGAIN:
4386 rtm->rtm_type = RTN_THROW;
4387 break;
ef2c7d7b
ND
4388 default:
4389 rtm->rtm_type = RTN_UNREACHABLE;
4390 break;
4391 }
4392 }
38308473 4393 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 4394 rtm->rtm_type = RTN_LOCAL;
4ee39733
DA
4395 else if (rt->rt6i_flags & RTF_ANYCAST)
4396 rtm->rtm_type = RTN_ANYCAST;
d1918542 4397 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
4398 rtm->rtm_type = RTN_LOCAL;
4399 else
4400 rtm->rtm_type = RTN_UNICAST;
4401 rtm->rtm_flags = 0;
4402 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4403 rtm->rtm_protocol = rt->rt6i_protocol;
1da177e4 4404
38308473 4405 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
4406 rtm->rtm_flags |= RTM_F_CLONED;
4407
4408 if (dst) {
930345ea 4409 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 4410 goto nla_put_failure;
1ab1457c 4411 rtm->rtm_dst_len = 128;
1da177e4 4412 } else if (rtm->rtm_dst_len)
930345ea 4413 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 4414 goto nla_put_failure;
1da177e4
LT
4415#ifdef CONFIG_IPV6_SUBTREES
4416 if (src) {
930345ea 4417 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4418 goto nla_put_failure;
1ab1457c 4419 rtm->rtm_src_len = 128;
c78679e8 4420 } else if (rtm->rtm_src_len &&
930345ea 4421 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 4422 goto nla_put_failure;
1da177e4 4423#endif
7bc570c8
YH
4424 if (iif) {
4425#ifdef CONFIG_IPV6_MROUTE
4426 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
fd61c6ba
DA
4427 int err = ip6mr_get_route(net, skb, rtm, portid);
4428
4429 if (err == 0)
4430 return 0;
4431 if (err < 0)
4432 goto nla_put_failure;
7bc570c8
YH
4433 } else
4434#endif
c78679e8
DM
4435 if (nla_put_u32(skb, RTA_IIF, iif))
4436 goto nla_put_failure;
7bc570c8 4437 } else if (dst) {
1da177e4 4438 struct in6_addr saddr_buf;
c78679e8 4439 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 4440 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4441 goto nla_put_failure;
1da177e4 4442 }
2d7202bf 4443
c3968a85
DW
4444 if (rt->rt6i_prefsrc.plen) {
4445 struct in6_addr saddr_buf;
4e3fd7a0 4446 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 4447 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4448 goto nla_put_failure;
c3968a85
DW
4449 }
4450
4b32b5ad
MKL
4451 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
4452 if (rt->rt6i_pmtu)
4453 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
4454 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
4455 goto nla_put_failure;
4456
c78679e8
DM
4457 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4458 goto nla_put_failure;
8253947e 4459
beb1afac
DA
4460 /* For multipath routes, walk the siblings list and add
4461 * each as a nexthop within RTA_MULTIPATH.
4462 */
4463 if (rt->rt6i_nsiblings) {
4464 struct rt6_info *sibling, *next_sibling;
4465 struct nlattr *mp;
4466
4467 mp = nla_nest_start(skb, RTA_MULTIPATH);
4468 if (!mp)
4469 goto nla_put_failure;
4470
4471 if (rt6_add_nexthop(skb, rt) < 0)
4472 goto nla_put_failure;
4473
4474 list_for_each_entry_safe(sibling, next_sibling,
4475 &rt->rt6i_siblings, rt6i_siblings) {
4476 if (rt6_add_nexthop(skb, sibling) < 0)
4477 goto nla_put_failure;
4478 }
4479
4480 nla_nest_end(skb, mp);
4481 } else {
5be083ce 4482 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4483 goto nla_put_failure;
4484 }
4485
8253947e 4486 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 4487
87a50699 4488 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 4489 goto nla_put_failure;
2d7202bf 4490
c78ba6d6
LR
4491 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4492 goto nla_put_failure;
4493
19e42e45 4494
053c095a
JB
4495 nlmsg_end(skb, nlh);
4496 return 0;
2d7202bf
TG
4497
4498nla_put_failure:
26932566
PM
4499 nlmsg_cancel(skb, nlh);
4500 return -EMSGSIZE;
1da177e4
LT
4501}
4502
1b43af54 4503int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
4504{
4505 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4506 struct net *net = arg->net;
4507
4508 if (rt == net->ipv6.ip6_null_entry)
4509 return 0;
1da177e4 4510
2d7202bf
TG
4511 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4512 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4513
4514 /* user wants prefix routes only */
4515 if (rtm->rtm_flags & RTM_F_PREFIX &&
4516 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4517 /* success since this is not a prefix route */
4518 return 1;
4519 }
4520 }
1da177e4 4521
1f17e2f2 4522 return rt6_fill_node(net,
191cd582 4523 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 4524 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
f8cfe2ce 4525 NLM_F_MULTI);
1da177e4
LT
4526}
4527
c21ef3e3
DA
4528static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4529 struct netlink_ext_ack *extack)
1da177e4 4530{
3b1e0a65 4531 struct net *net = sock_net(in_skb->sk);
ab364a6f 4532 struct nlattr *tb[RTA_MAX+1];
18c3a61c
RP
4533 int err, iif = 0, oif = 0;
4534 struct dst_entry *dst;
ab364a6f 4535 struct rt6_info *rt;
1da177e4 4536 struct sk_buff *skb;
ab364a6f 4537 struct rtmsg *rtm;
4c9483b2 4538 struct flowi6 fl6;
18c3a61c 4539 bool fibmatch;
1da177e4 4540
fceb6435 4541 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4542 extack);
ab364a6f
TG
4543 if (err < 0)
4544 goto errout;
1da177e4 4545
ab364a6f 4546 err = -EINVAL;
4c9483b2 4547 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4548 rtm = nlmsg_data(nlh);
4549 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4550 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4551
ab364a6f
TG
4552 if (tb[RTA_SRC]) {
4553 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4554 goto errout;
4555
4e3fd7a0 4556 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4557 }
4558
4559 if (tb[RTA_DST]) {
4560 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4561 goto errout;
4562
4e3fd7a0 4563 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4564 }
4565
4566 if (tb[RTA_IIF])
4567 iif = nla_get_u32(tb[RTA_IIF]);
4568
4569 if (tb[RTA_OIF])
72331bc0 4570 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4571
2e47b291
LC
4572 if (tb[RTA_MARK])
4573 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4574
622ec2c9
LC
4575 if (tb[RTA_UID])
4576 fl6.flowi6_uid = make_kuid(current_user_ns(),
4577 nla_get_u32(tb[RTA_UID]));
4578 else
4579 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4580
1da177e4
LT
4581 if (iif) {
4582 struct net_device *dev;
72331bc0
SL
4583 int flags = 0;
4584
121622db
FW
4585 rcu_read_lock();
4586
4587 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4588 if (!dev) {
121622db 4589 rcu_read_unlock();
1da177e4 4590 err = -ENODEV;
ab364a6f 4591 goto errout;
1da177e4 4592 }
72331bc0
SL
4593
4594 fl6.flowi6_iif = iif;
4595
4596 if (!ipv6_addr_any(&fl6.saddr))
4597 flags |= RT6_LOOKUP_F_HAS_SADDR;
4598
58acfd71 4599 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
121622db
FW
4600
4601 rcu_read_unlock();
72331bc0
SL
4602 } else {
4603 fl6.flowi6_oif = oif;
4604
58acfd71 4605 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4606 }
4607
18c3a61c
RP
4608
4609 rt = container_of(dst, struct rt6_info, dst);
4610 if (rt->dst.error) {
4611 err = rt->dst.error;
4612 ip6_rt_put(rt);
4613 goto errout;
1da177e4
LT
4614 }
4615
9d6acb3b
WC
4616 if (rt == net->ipv6.ip6_null_entry) {
4617 err = rt->dst.error;
4618 ip6_rt_put(rt);
4619 goto errout;
4620 }
4621
fba961ab
DM
4622 if (fibmatch && rt->from) {
4623 struct rt6_info *ort = rt->from;
58acfd71
IS
4624
4625 dst_hold(&ort->dst);
4626 ip6_rt_put(rt);
4627 rt = ort;
4628 }
4629
ab364a6f 4630 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4631 if (!skb) {
94e187c0 4632 ip6_rt_put(rt);
ab364a6f
TG
4633 err = -ENOBUFS;
4634 goto errout;
4635 }
1da177e4 4636
d8d1f30b 4637 skb_dst_set(skb, &rt->dst);
18c3a61c
RP
4638 if (fibmatch)
4639 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
4640 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4641 nlh->nlmsg_seq, 0);
4642 else
4643 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
4644 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4645 nlh->nlmsg_seq, 0);
1da177e4 4646 if (err < 0) {
ab364a6f
TG
4647 kfree_skb(skb);
4648 goto errout;
1da177e4
LT
4649 }
4650
15e47304 4651 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4652errout:
1da177e4 4653 return err;
1da177e4
LT
4654}
4655
37a1d361
RP
4656void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4657 unsigned int nlm_flags)
1da177e4
LT
4658{
4659 struct sk_buff *skb;
5578689a 4660 struct net *net = info->nl_net;
528c4ceb
DL
4661 u32 seq;
4662 int err;
4663
4664 err = -ENOBUFS;
38308473 4665 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4666
19e42e45 4667 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4668 if (!skb)
21713ebc
TG
4669 goto errout;
4670
191cd582 4671 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
f8cfe2ce 4672 event, info->portid, seq, nlm_flags);
26932566
PM
4673 if (err < 0) {
4674 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4675 WARN_ON(err == -EMSGSIZE);
4676 kfree_skb(skb);
4677 goto errout;
4678 }
15e47304 4679 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4680 info->nlh, gfp_any());
4681 return;
21713ebc
TG
4682errout:
4683 if (err < 0)
5578689a 4684 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4685}
4686
8ed67789 4687static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4688 unsigned long event, void *ptr)
8ed67789 4689{
351638e7 4690 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 4691 struct net *net = dev_net(dev);
8ed67789 4692
242d3a49
WC
4693 if (!(dev->flags & IFF_LOOPBACK))
4694 return NOTIFY_OK;
4695
4696 if (event == NETDEV_REGISTER) {
d8d1f30b 4697 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
4698 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4699#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 4700 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 4701 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4702 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 4703 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 4704#endif
76da0704
WC
4705 } else if (event == NETDEV_UNREGISTER &&
4706 dev->reg_state != NETREG_UNREGISTERED) {
4707 /* NETDEV_UNREGISTER could be fired for multiple times by
4708 * netdev_wait_allrefs(). Make sure we only call this once.
4709 */
12d94a80 4710 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 4711#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
4712 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4713 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
4714#endif
4715 }
4716
4717 return NOTIFY_OK;
4718}
4719
1da177e4
LT
4720/*
4721 * /proc
4722 */
4723
4724#ifdef CONFIG_PROC_FS
4725
33120b30 4726static const struct file_operations ipv6_route_proc_fops = {
33120b30
AD
4727 .open = ipv6_route_open,
4728 .read = seq_read,
4729 .llseek = seq_lseek,
8d2ca1d7 4730 .release = seq_release_net,
33120b30
AD
4731};
4732
1da177e4
LT
4733static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4734{
69ddb805 4735 struct net *net = (struct net *)seq->private;
1da177e4 4736 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
4737 net->ipv6.rt6_stats->fib_nodes,
4738 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 4739 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
4740 net->ipv6.rt6_stats->fib_rt_entries,
4741 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 4742 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 4743 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
4744
4745 return 0;
4746}
4747
4748static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4749{
de05c557 4750 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
4751}
4752
9a32144e 4753static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
4754 .open = rt6_stats_seq_open,
4755 .read = seq_read,
4756 .llseek = seq_lseek,
b6fcbdb4 4757 .release = single_release_net,
1da177e4
LT
4758};
4759#endif /* CONFIG_PROC_FS */
4760
4761#ifdef CONFIG_SYSCTL
4762
1da177e4 4763static
fe2c6338 4764int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
4765 void __user *buffer, size_t *lenp, loff_t *ppos)
4766{
c486da34
LAG
4767 struct net *net;
4768 int delay;
4769 if (!write)
1da177e4 4770 return -EINVAL;
c486da34
LAG
4771
4772 net = (struct net *)ctl->extra1;
4773 delay = net->ipv6.sysctl.flush_delay;
4774 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 4775 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 4776 return 0;
1da177e4
LT
4777}
4778
fe2c6338 4779struct ctl_table ipv6_route_table_template[] = {
1ab1457c 4780 {
1da177e4 4781 .procname = "flush",
4990509f 4782 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 4783 .maxlen = sizeof(int),
89c8b3a1 4784 .mode = 0200,
6d9f239a 4785 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
4786 },
4787 {
1da177e4 4788 .procname = "gc_thresh",
9a7ec3a9 4789 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
4790 .maxlen = sizeof(int),
4791 .mode = 0644,
6d9f239a 4792 .proc_handler = proc_dointvec,
1da177e4
LT
4793 },
4794 {
1da177e4 4795 .procname = "max_size",
4990509f 4796 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
4797 .maxlen = sizeof(int),
4798 .mode = 0644,
6d9f239a 4799 .proc_handler = proc_dointvec,
1da177e4
LT
4800 },
4801 {
1da177e4 4802 .procname = "gc_min_interval",
4990509f 4803 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4804 .maxlen = sizeof(int),
4805 .mode = 0644,
6d9f239a 4806 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4807 },
4808 {
1da177e4 4809 .procname = "gc_timeout",
4990509f 4810 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
4811 .maxlen = sizeof(int),
4812 .mode = 0644,
6d9f239a 4813 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4814 },
4815 {
1da177e4 4816 .procname = "gc_interval",
4990509f 4817 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
4818 .maxlen = sizeof(int),
4819 .mode = 0644,
6d9f239a 4820 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4821 },
4822 {
1da177e4 4823 .procname = "gc_elasticity",
4990509f 4824 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
4825 .maxlen = sizeof(int),
4826 .mode = 0644,
f3d3f616 4827 .proc_handler = proc_dointvec,
1da177e4
LT
4828 },
4829 {
1da177e4 4830 .procname = "mtu_expires",
4990509f 4831 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
4832 .maxlen = sizeof(int),
4833 .mode = 0644,
6d9f239a 4834 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4835 },
4836 {
1da177e4 4837 .procname = "min_adv_mss",
4990509f 4838 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
4839 .maxlen = sizeof(int),
4840 .mode = 0644,
f3d3f616 4841 .proc_handler = proc_dointvec,
1da177e4
LT
4842 },
4843 {
1da177e4 4844 .procname = "gc_min_interval_ms",
4990509f 4845 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4846 .maxlen = sizeof(int),
4847 .mode = 0644,
6d9f239a 4848 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 4849 },
f8572d8f 4850 { }
1da177e4
LT
4851};
4852
2c8c1e72 4853struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
4854{
4855 struct ctl_table *table;
4856
4857 table = kmemdup(ipv6_route_table_template,
4858 sizeof(ipv6_route_table_template),
4859 GFP_KERNEL);
5ee09105
YH
4860
4861 if (table) {
4862 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 4863 table[0].extra1 = net;
86393e52 4864 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
4865 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
4866 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
4867 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
4868 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
4869 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
4870 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
4871 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 4872 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
4873
4874 /* Don't export sysctls to unprivileged users */
4875 if (net->user_ns != &init_user_ns)
4876 table[0].procname = NULL;
5ee09105
YH
4877 }
4878
760f2d01
DL
4879 return table;
4880}
1da177e4
LT
4881#endif
4882
2c8c1e72 4883static int __net_init ip6_route_net_init(struct net *net)
cdb18761 4884{
633d424b 4885 int ret = -ENOMEM;
8ed67789 4886
86393e52
AD
4887 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
4888 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 4889
fc66f95c
ED
4890 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
4891 goto out_ip6_dst_ops;
4892
8ed67789
DL
4893 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
4894 sizeof(*net->ipv6.ip6_null_entry),
4895 GFP_KERNEL);
4896 if (!net->ipv6.ip6_null_entry)
fc66f95c 4897 goto out_ip6_dst_entries;
d8d1f30b 4898 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4899 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
4900 ip6_template_metrics, true);
8ed67789
DL
4901
4902#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 4903 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
4904 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
4905 sizeof(*net->ipv6.ip6_prohibit_entry),
4906 GFP_KERNEL);
68fffc67
PZ
4907 if (!net->ipv6.ip6_prohibit_entry)
4908 goto out_ip6_null_entry;
d8d1f30b 4909 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4910 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4911 ip6_template_metrics, true);
8ed67789
DL
4912
4913 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
4914 sizeof(*net->ipv6.ip6_blk_hole_entry),
4915 GFP_KERNEL);
68fffc67
PZ
4916 if (!net->ipv6.ip6_blk_hole_entry)
4917 goto out_ip6_prohibit_entry;
d8d1f30b 4918 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4919 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
4920 ip6_template_metrics, true);
8ed67789
DL
4921#endif
4922
b339a47c
PZ
4923 net->ipv6.sysctl.flush_delay = 0;
4924 net->ipv6.sysctl.ip6_rt_max_size = 4096;
4925 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
4926 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
4927 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
4928 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
4929 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
4930 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
4931
6891a346
BT
4932 net->ipv6.ip6_rt_gc_expire = 30*HZ;
4933
8ed67789
DL
4934 ret = 0;
4935out:
4936 return ret;
f2fc6a54 4937
68fffc67
PZ
4938#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4939out_ip6_prohibit_entry:
4940 kfree(net->ipv6.ip6_prohibit_entry);
4941out_ip6_null_entry:
4942 kfree(net->ipv6.ip6_null_entry);
4943#endif
fc66f95c
ED
4944out_ip6_dst_entries:
4945 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 4946out_ip6_dst_ops:
f2fc6a54 4947 goto out;
cdb18761
DL
4948}
4949
2c8c1e72 4950static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 4951{
8ed67789
DL
4952 kfree(net->ipv6.ip6_null_entry);
4953#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4954 kfree(net->ipv6.ip6_prohibit_entry);
4955 kfree(net->ipv6.ip6_blk_hole_entry);
4956#endif
41bb78b4 4957 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
4958}
4959
d189634e
TG
4960static int __net_init ip6_route_net_init_late(struct net *net)
4961{
4962#ifdef CONFIG_PROC_FS
d4beaa66
G
4963 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
4964 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
4965#endif
4966 return 0;
4967}
4968
4969static void __net_exit ip6_route_net_exit_late(struct net *net)
4970{
4971#ifdef CONFIG_PROC_FS
ece31ffd
G
4972 remove_proc_entry("ipv6_route", net->proc_net);
4973 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
4974#endif
4975}
4976
cdb18761
DL
4977static struct pernet_operations ip6_route_net_ops = {
4978 .init = ip6_route_net_init,
4979 .exit = ip6_route_net_exit,
4980};
4981
c3426b47
DM
4982static int __net_init ipv6_inetpeer_init(struct net *net)
4983{
4984 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4985
4986 if (!bp)
4987 return -ENOMEM;
4988 inet_peer_base_init(bp);
4989 net->ipv6.peers = bp;
4990 return 0;
4991}
4992
4993static void __net_exit ipv6_inetpeer_exit(struct net *net)
4994{
4995 struct inet_peer_base *bp = net->ipv6.peers;
4996
4997 net->ipv6.peers = NULL;
56a6b248 4998 inetpeer_invalidate_tree(bp);
c3426b47
DM
4999 kfree(bp);
5000}
5001
2b823f72 5002static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5003 .init = ipv6_inetpeer_init,
5004 .exit = ipv6_inetpeer_exit,
5005};
5006
d189634e
TG
5007static struct pernet_operations ip6_route_net_late_ops = {
5008 .init = ip6_route_net_init_late,
5009 .exit = ip6_route_net_exit_late,
5010};
5011
8ed67789
DL
5012static struct notifier_block ip6_route_dev_notifier = {
5013 .notifier_call = ip6_route_dev_notify,
242d3a49 5014 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5015};
5016
2f460933
WC
5017void __init ip6_route_init_special_entries(void)
5018{
5019 /* Registering of the loopback is done before this portion of code,
5020 * the loopback reference in rt6_info will not be taken, do it
5021 * manually for init_net */
5022 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5023 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5024 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5025 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5026 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5027 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5028 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5029 #endif
5030}
5031
433d49c3 5032int __init ip6_route_init(void)
1da177e4 5033{
433d49c3 5034 int ret;
8d0b94af 5035 int cpu;
433d49c3 5036
9a7ec3a9
DL
5037 ret = -ENOMEM;
5038 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5039 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5040 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5041 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5042 goto out;
14e50e57 5043
fc66f95c 5044 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5045 if (ret)
bdb3289f 5046 goto out_kmem_cache;
bdb3289f 5047
c3426b47
DM
5048 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5049 if (ret)
e8803b6c 5050 goto out_dst_entries;
2a0c451a 5051
7e52b33b
DM
5052 ret = register_pernet_subsys(&ip6_route_net_ops);
5053 if (ret)
5054 goto out_register_inetpeer;
c3426b47 5055
5dc121e9
AE
5056 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5057
e8803b6c 5058 ret = fib6_init();
433d49c3 5059 if (ret)
8ed67789 5060 goto out_register_subsys;
433d49c3 5061
433d49c3
DL
5062 ret = xfrm6_init();
5063 if (ret)
e8803b6c 5064 goto out_fib6_init;
c35b7e72 5065
433d49c3
DL
5066 ret = fib6_rules_init();
5067 if (ret)
5068 goto xfrm6_init;
7e5449c2 5069
d189634e
TG
5070 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5071 if (ret)
5072 goto fib6_rules_init;
5073
16feebcf
FW
5074 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5075 inet6_rtm_newroute, NULL, 0);
5076 if (ret < 0)
5077 goto out_register_late_subsys;
5078
5079 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5080 inet6_rtm_delroute, NULL, 0);
5081 if (ret < 0)
5082 goto out_register_late_subsys;
5083
5084 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5085 inet6_rtm_getroute, NULL,
5086 RTNL_FLAG_DOIT_UNLOCKED);
5087 if (ret < 0)
d189634e 5088 goto out_register_late_subsys;
c127ea2c 5089
8ed67789 5090 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5091 if (ret)
d189634e 5092 goto out_register_late_subsys;
8ed67789 5093
8d0b94af
MKL
5094 for_each_possible_cpu(cpu) {
5095 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5096
5097 INIT_LIST_HEAD(&ul->head);
5098 spin_lock_init(&ul->lock);
5099 }
5100
433d49c3
DL
5101out:
5102 return ret;
5103
d189634e 5104out_register_late_subsys:
16feebcf 5105 rtnl_unregister_all(PF_INET6);
d189634e 5106 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5107fib6_rules_init:
433d49c3
DL
5108 fib6_rules_cleanup();
5109xfrm6_init:
433d49c3 5110 xfrm6_fini();
2a0c451a
TG
5111out_fib6_init:
5112 fib6_gc_cleanup();
8ed67789
DL
5113out_register_subsys:
5114 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5115out_register_inetpeer:
5116 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5117out_dst_entries:
5118 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5119out_kmem_cache:
f2fc6a54 5120 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5121 goto out;
1da177e4
LT
5122}
5123
5124void ip6_route_cleanup(void)
5125{
8ed67789 5126 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5127 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5128 fib6_rules_cleanup();
1da177e4 5129 xfrm6_fini();
1da177e4 5130 fib6_gc_cleanup();
c3426b47 5131 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5132 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5133 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5134 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5135}