]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - net/ipv6/route.c
net: phy: marvell10g: add thermal hwmon device
[thirdparty/kernel/stable.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
b811580d 66#include <trace/events/fib6.h>
1da177e4 67
7c0f6ba6 68#include <linux/uaccess.h>
1da177e4
LT
69
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
afc154e9 74enum rt6_nud_state {
7e980569
JB
75 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
78 RT6_NUD_SUCCEED = 1
79};
80
83a09abd 81static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 82static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 83static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 84static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
85static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86static void ip6_dst_destroy(struct dst_entry *);
87static void ip6_dst_ifdown(struct dst_entry *,
88 struct net_device *dev, int how);
569d3645 89static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
90
91static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 92static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 93static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 94static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 95static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
96static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
97 struct sk_buff *skb, u32 mtu);
98static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
99 struct sk_buff *skb);
4b32b5ad 100static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 101static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
16a16cd3
DA
102static size_t rt6_nlmsg_size(struct rt6_info *rt);
103static int rt6_fill_node(struct net *net,
104 struct sk_buff *skb, struct rt6_info *rt,
105 struct in6_addr *dst, struct in6_addr *src,
106 int iif, int type, u32 portid, u32 seq,
107 unsigned int flags);
35732d01
WW
108static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
109 struct in6_addr *daddr,
110 struct in6_addr *saddr);
1da177e4 111
70ceb4f5 112#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 113static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 114 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
115 const struct in6_addr *gwaddr,
116 struct net_device *dev,
95c96174 117 unsigned int pref);
efa2cea0 118static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 119 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
120 const struct in6_addr *gwaddr,
121 struct net_device *dev);
70ceb4f5
YH
122#endif
123
8d0b94af
MKL
124struct uncached_list {
125 spinlock_t lock;
126 struct list_head head;
127};
128
129static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
130
510c321b 131void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
132{
133 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
134
8d0b94af
MKL
135 rt->rt6i_uncached_list = ul;
136
137 spin_lock_bh(&ul->lock);
138 list_add_tail(&rt->rt6i_uncached, &ul->head);
139 spin_unlock_bh(&ul->lock);
140}
141
510c321b 142void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
143{
144 if (!list_empty(&rt->rt6i_uncached)) {
145 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 146 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
147
148 spin_lock_bh(&ul->lock);
149 list_del(&rt->rt6i_uncached);
81eb8447 150 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
151 spin_unlock_bh(&ul->lock);
152 }
153}
154
155static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
156{
157 struct net_device *loopback_dev = net->loopback_dev;
158 int cpu;
159
e332bc67
EB
160 if (dev == loopback_dev)
161 return;
162
8d0b94af
MKL
163 for_each_possible_cpu(cpu) {
164 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
165 struct rt6_info *rt;
166
167 spin_lock_bh(&ul->lock);
168 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
169 struct inet6_dev *rt_idev = rt->rt6i_idev;
170 struct net_device *rt_dev = rt->dst.dev;
171
e332bc67 172 if (rt_idev->dev == dev) {
8d0b94af
MKL
173 rt->rt6i_idev = in6_dev_get(loopback_dev);
174 in6_dev_put(rt_idev);
175 }
176
e332bc67 177 if (rt_dev == dev) {
8d0b94af
MKL
178 rt->dst.dev = loopback_dev;
179 dev_hold(rt->dst.dev);
180 dev_put(rt_dev);
181 }
182 }
183 spin_unlock_bh(&ul->lock);
184 }
185}
186
d52d3997
MKL
187static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
188{
3a2232e9 189 return dst_metrics_write_ptr(&rt->from->dst);
d52d3997
MKL
190}
191
06582540
DM
192static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
193{
4b32b5ad 194 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 195
d52d3997
MKL
196 if (rt->rt6i_flags & RTF_PCPU)
197 return rt6_pcpu_cow_metrics(rt);
198 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
199 return NULL;
200 else
3b471175 201 return dst_cow_metrics_generic(dst, old);
06582540
DM
202}
203
f894cbf8
DM
204static inline const void *choose_neigh_daddr(struct rt6_info *rt,
205 struct sk_buff *skb,
206 const void *daddr)
39232973
DM
207{
208 struct in6_addr *p = &rt->rt6i_gateway;
209
a7563f34 210 if (!ipv6_addr_any(p))
39232973 211 return (const void *) p;
f894cbf8
DM
212 else if (skb)
213 return &ipv6_hdr(skb)->daddr;
39232973
DM
214 return daddr;
215}
216
f894cbf8
DM
217static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
218 struct sk_buff *skb,
219 const void *daddr)
d3aaeb38 220{
39232973
DM
221 struct rt6_info *rt = (struct rt6_info *) dst;
222 struct neighbour *n;
223
f894cbf8 224 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 225 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
226 if (n)
227 return n;
228 return neigh_create(&nd_tbl, daddr, dst->dev);
229}
230
63fca65d
JA
231static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
232{
233 struct net_device *dev = dst->dev;
234 struct rt6_info *rt = (struct rt6_info *)dst;
235
236 daddr = choose_neigh_daddr(rt, NULL, daddr);
237 if (!daddr)
238 return;
239 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
240 return;
241 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
242 return;
243 __ipv6_confirm_neigh(dev, daddr);
244}
245
9a7ec3a9 246static struct dst_ops ip6_dst_ops_template = {
1da177e4 247 .family = AF_INET6,
1da177e4
LT
248 .gc = ip6_dst_gc,
249 .gc_thresh = 1024,
250 .check = ip6_dst_check,
0dbaee3b 251 .default_advmss = ip6_default_advmss,
ebb762f2 252 .mtu = ip6_mtu,
06582540 253 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
254 .destroy = ip6_dst_destroy,
255 .ifdown = ip6_dst_ifdown,
256 .negative_advice = ip6_negative_advice,
257 .link_failure = ip6_link_failure,
258 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 259 .redirect = rt6_do_redirect,
9f8955cc 260 .local_out = __ip6_local_out,
d3aaeb38 261 .neigh_lookup = ip6_neigh_lookup,
63fca65d 262 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
263};
264
ebb762f2 265static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 266{
618f9bc7
SK
267 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
268
269 return mtu ? : dst->dev->mtu;
ec831ea7
RD
270}
271
6700c270
DM
272static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
273 struct sk_buff *skb, u32 mtu)
14e50e57
DM
274{
275}
276
6700c270
DM
277static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
278 struct sk_buff *skb)
b587ee3b
DM
279{
280}
281
14e50e57
DM
282static struct dst_ops ip6_dst_blackhole_ops = {
283 .family = AF_INET6,
14e50e57
DM
284 .destroy = ip6_dst_destroy,
285 .check = ip6_dst_check,
ebb762f2 286 .mtu = ip6_blackhole_mtu,
214f45c9 287 .default_advmss = ip6_default_advmss,
14e50e57 288 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 289 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 290 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 291 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
292};
293
62fa8a84 294static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 295 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
296};
297
fb0af4c7 298static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
299 .dst = {
300 .__refcnt = ATOMIC_INIT(1),
301 .__use = 1,
2c20cbd7 302 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 303 .error = -ENETUNREACH,
d8d1f30b
CG
304 .input = ip6_pkt_discard,
305 .output = ip6_pkt_discard_out,
1da177e4
LT
306 },
307 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 308 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
309 .rt6i_metric = ~(u32) 0,
310 .rt6i_ref = ATOMIC_INIT(1),
311};
312
101367c2
TG
313#ifdef CONFIG_IPV6_MULTIPLE_TABLES
314
fb0af4c7 315static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
316 .dst = {
317 .__refcnt = ATOMIC_INIT(1),
318 .__use = 1,
2c20cbd7 319 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 320 .error = -EACCES,
d8d1f30b
CG
321 .input = ip6_pkt_prohibit,
322 .output = ip6_pkt_prohibit_out,
101367c2
TG
323 },
324 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 325 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
326 .rt6i_metric = ~(u32) 0,
327 .rt6i_ref = ATOMIC_INIT(1),
328};
329
fb0af4c7 330static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
331 .dst = {
332 .__refcnt = ATOMIC_INIT(1),
333 .__use = 1,
2c20cbd7 334 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 335 .error = -EINVAL,
d8d1f30b 336 .input = dst_discard,
ede2059d 337 .output = dst_discard_out,
101367c2
TG
338 },
339 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 340 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
341 .rt6i_metric = ~(u32) 0,
342 .rt6i_ref = ATOMIC_INIT(1),
343};
344
345#endif
346
ebfa45f0
MKL
347static void rt6_info_init(struct rt6_info *rt)
348{
349 struct dst_entry *dst = &rt->dst;
350
351 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
352 INIT_LIST_HEAD(&rt->rt6i_siblings);
353 INIT_LIST_HEAD(&rt->rt6i_uncached);
354}
355
1da177e4 356/* allocate dst with ip6_dst_ops */
d52d3997
MKL
357static struct rt6_info *__ip6_dst_alloc(struct net *net,
358 struct net_device *dev,
ad706862 359 int flags)
1da177e4 360{
97bab73f 361 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 362 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 363
81eb8447 364 if (rt) {
ebfa45f0 365 rt6_info_init(rt);
81eb8447
WW
366 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
367 }
8104891b 368
cf911662 369 return rt;
1da177e4
LT
370}
371
9ab179d8
DA
372struct rt6_info *ip6_dst_alloc(struct net *net,
373 struct net_device *dev,
374 int flags)
d52d3997 375{
ad706862 376 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
377
378 if (rt) {
379 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
bfd8e5a4 380 if (!rt->rt6i_pcpu) {
587fea74 381 dst_release_immediate(&rt->dst);
d52d3997
MKL
382 return NULL;
383 }
384 }
385
386 return rt;
387}
9ab179d8 388EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 389
1da177e4
LT
390static void ip6_dst_destroy(struct dst_entry *dst)
391{
392 struct rt6_info *rt = (struct rt6_info *)dst;
35732d01 393 struct rt6_exception_bucket *bucket;
3a2232e9 394 struct rt6_info *from = rt->from;
8d0b94af 395 struct inet6_dev *idev;
1da177e4 396
4b32b5ad 397 dst_destroy_metrics_generic(dst);
87775312 398 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
399 rt6_uncached_list_del(rt);
400
401 idev = rt->rt6i_idev;
38308473 402 if (idev) {
1da177e4
LT
403 rt->rt6i_idev = NULL;
404 in6_dev_put(idev);
1ab1457c 405 }
35732d01
WW
406 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
407 if (bucket) {
408 rt->rt6i_exception_bucket = NULL;
409 kfree(bucket);
410 }
1716a961 411
3a2232e9
DM
412 rt->from = NULL;
413 dst_release(&from->dst);
b3419363
DM
414}
415
1da177e4
LT
416static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
417 int how)
418{
419 struct rt6_info *rt = (struct rt6_info *)dst;
420 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 421 struct net_device *loopback_dev =
c346dca1 422 dev_net(dev)->loopback_dev;
1da177e4 423
e5645f51
WW
424 if (idev && idev->dev != loopback_dev) {
425 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
426 if (loopback_idev) {
427 rt->rt6i_idev = loopback_idev;
428 in6_dev_put(idev);
97cac082 429 }
1da177e4
LT
430 }
431}
432
5973fb1e
MKL
433static bool __rt6_check_expired(const struct rt6_info *rt)
434{
435 if (rt->rt6i_flags & RTF_EXPIRES)
436 return time_after(jiffies, rt->dst.expires);
437 else
438 return false;
439}
440
a50feda5 441static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 442{
1716a961
G
443 if (rt->rt6i_flags & RTF_EXPIRES) {
444 if (time_after(jiffies, rt->dst.expires))
a50feda5 445 return true;
3a2232e9 446 } else if (rt->from) {
1e2ea8ad 447 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
3a2232e9 448 rt6_check_expired(rt->from);
1716a961 449 }
a50feda5 450 return false;
1da177e4
LT
451}
452
b4bac172
DA
453static struct rt6_info *rt6_multipath_select(const struct net *net,
454 struct rt6_info *match,
52bd4c0c 455 struct flowi6 *fl6, int oif,
b75cc8f9 456 const struct sk_buff *skb,
52bd4c0c 457 int strict)
51ebd318
ND
458{
459 struct rt6_info *sibling, *next_sibling;
51ebd318 460
b673d6cc
JS
461 /* We might have already computed the hash for ICMPv6 errors. In such
462 * case it will always be non-zero. Otherwise now is the time to do it.
463 */
464 if (!fl6->mp_hash)
b4bac172 465 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 466
3d709f69
IS
467 if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
468 return match;
469
470 list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
471 rt6i_siblings) {
472 if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound))
473 continue;
474 if (rt6_score_route(sibling, oif, strict) < 0)
475 break;
476 match = sibling;
477 break;
478 }
479
51ebd318
ND
480 return match;
481}
482
1da177e4 483/*
66f5d6ce 484 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
485 */
486
8ed67789
DL
487static inline struct rt6_info *rt6_device_match(struct net *net,
488 struct rt6_info *rt,
b71d1d42 489 const struct in6_addr *saddr,
1da177e4 490 int oif,
d420895e 491 int flags)
1da177e4
LT
492{
493 struct rt6_info *local = NULL;
494 struct rt6_info *sprt;
495
8067bb8c
IS
496 if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD))
497 return rt;
dd3abc4e 498
071fb37e 499 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
d1918542 500 struct net_device *dev = sprt->dst.dev;
dd3abc4e 501
8067bb8c
IS
502 if (sprt->rt6i_nh_flags & RTNH_F_DEAD)
503 continue;
504
dd3abc4e 505 if (oif) {
1da177e4
LT
506 if (dev->ifindex == oif)
507 return sprt;
508 if (dev->flags & IFF_LOOPBACK) {
38308473 509 if (!sprt->rt6i_idev ||
1da177e4 510 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 511 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 512 continue;
17fb0b2b
DA
513 if (local &&
514 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
515 continue;
516 }
517 local = sprt;
518 }
dd3abc4e
YH
519 } else {
520 if (ipv6_chk_addr(net, saddr, dev,
521 flags & RT6_LOOKUP_F_IFACE))
522 return sprt;
1da177e4 523 }
dd3abc4e 524 }
1da177e4 525
dd3abc4e 526 if (oif) {
1da177e4
LT
527 if (local)
528 return local;
529
d420895e 530 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 531 return net->ipv6.ip6_null_entry;
1da177e4 532 }
8067bb8c
IS
533
534 return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt;
1da177e4
LT
535}
536
27097255 537#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
538struct __rt6_probe_work {
539 struct work_struct work;
540 struct in6_addr target;
541 struct net_device *dev;
542};
543
544static void rt6_probe_deferred(struct work_struct *w)
545{
546 struct in6_addr mcaddr;
547 struct __rt6_probe_work *work =
548 container_of(w, struct __rt6_probe_work, work);
549
550 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 551 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 552 dev_put(work->dev);
662f5533 553 kfree(work);
c2f17e82
HFS
554}
555
27097255
YH
556static void rt6_probe(struct rt6_info *rt)
557{
990edb42 558 struct __rt6_probe_work *work;
f2c31e32 559 struct neighbour *neigh;
27097255
YH
560 /*
561 * Okay, this does not seem to be appropriate
562 * for now, however, we need to check if it
563 * is really so; aka Router Reachability Probing.
564 *
565 * Router Reachability Probe MUST be rate-limited
566 * to no more than one per minute.
567 */
2152caea 568 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 569 return;
2152caea
YH
570 rcu_read_lock_bh();
571 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
572 if (neigh) {
8d6c31bf
MKL
573 if (neigh->nud_state & NUD_VALID)
574 goto out;
575
990edb42 576 work = NULL;
2152caea 577 write_lock(&neigh->lock);
990edb42
MKL
578 if (!(neigh->nud_state & NUD_VALID) &&
579 time_after(jiffies,
580 neigh->updated +
581 rt->rt6i_idev->cnf.rtr_probe_interval)) {
582 work = kmalloc(sizeof(*work), GFP_ATOMIC);
583 if (work)
584 __neigh_set_probe_once(neigh);
c2f17e82 585 }
2152caea 586 write_unlock(&neigh->lock);
990edb42
MKL
587 } else {
588 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 589 }
990edb42
MKL
590
591 if (work) {
592 INIT_WORK(&work->work, rt6_probe_deferred);
593 work->target = rt->rt6i_gateway;
594 dev_hold(rt->dst.dev);
595 work->dev = rt->dst.dev;
596 schedule_work(&work->work);
597 }
598
8d6c31bf 599out:
2152caea 600 rcu_read_unlock_bh();
27097255
YH
601}
602#else
603static inline void rt6_probe(struct rt6_info *rt)
604{
27097255
YH
605}
606#endif
607
1da177e4 608/*
554cfb7e 609 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 610 */
b6f99a21 611static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 612{
d1918542 613 struct net_device *dev = rt->dst.dev;
161980f4 614 if (!oif || dev->ifindex == oif)
554cfb7e 615 return 2;
161980f4
DM
616 if ((dev->flags & IFF_LOOPBACK) &&
617 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
618 return 1;
619 return 0;
554cfb7e 620}
1da177e4 621
afc154e9 622static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 623{
f2c31e32 624 struct neighbour *neigh;
afc154e9 625 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 626
4d0c5911
YH
627 if (rt->rt6i_flags & RTF_NONEXTHOP ||
628 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 629 return RT6_NUD_SUCCEED;
145a3621
YH
630
631 rcu_read_lock_bh();
632 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
633 if (neigh) {
634 read_lock(&neigh->lock);
554cfb7e 635 if (neigh->nud_state & NUD_VALID)
afc154e9 636 ret = RT6_NUD_SUCCEED;
398bcbeb 637#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 638 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 639 ret = RT6_NUD_SUCCEED;
7e980569
JB
640 else
641 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 642#endif
145a3621 643 read_unlock(&neigh->lock);
afc154e9
HFS
644 } else {
645 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 646 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 647 }
145a3621
YH
648 rcu_read_unlock_bh();
649
a5a81f0b 650 return ret;
1da177e4
LT
651}
652
554cfb7e
YH
653static int rt6_score_route(struct rt6_info *rt, int oif,
654 int strict)
1da177e4 655{
a5a81f0b 656 int m;
1ab1457c 657
4d0c5911 658 m = rt6_check_dev(rt, oif);
77d16f45 659 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 660 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
661#ifdef CONFIG_IPV6_ROUTER_PREF
662 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
663#endif
afc154e9
HFS
664 if (strict & RT6_LOOKUP_F_REACHABLE) {
665 int n = rt6_check_neigh(rt);
666 if (n < 0)
667 return n;
668 }
554cfb7e
YH
669 return m;
670}
671
f11e6659 672static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
673 int *mpri, struct rt6_info *match,
674 bool *do_rr)
554cfb7e 675{
f11e6659 676 int m;
afc154e9 677 bool match_do_rr = false;
35103d11 678 struct inet6_dev *idev = rt->rt6i_idev;
35103d11 679
8067bb8c
IS
680 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
681 goto out;
682
14c5206c
IS
683 if (idev->cnf.ignore_routes_with_linkdown &&
684 rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 685 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 686 goto out;
f11e6659
DM
687
688 if (rt6_check_expired(rt))
689 goto out;
690
691 m = rt6_score_route(rt, oif, strict);
7e980569 692 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
693 match_do_rr = true;
694 m = 0; /* lowest valid score */
7e980569 695 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 696 goto out;
afc154e9
HFS
697 }
698
699 if (strict & RT6_LOOKUP_F_REACHABLE)
700 rt6_probe(rt);
f11e6659 701
7e980569 702 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 703 if (m > *mpri) {
afc154e9 704 *do_rr = match_do_rr;
f11e6659
DM
705 *mpri = m;
706 match = rt;
f11e6659 707 }
f11e6659
DM
708out:
709 return match;
710}
711
712static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
8d1040e8 713 struct rt6_info *leaf,
f11e6659 714 struct rt6_info *rr_head,
afc154e9
HFS
715 u32 metric, int oif, int strict,
716 bool *do_rr)
f11e6659 717{
9fbdcfaf 718 struct rt6_info *rt, *match, *cont;
554cfb7e 719 int mpri = -1;
1da177e4 720
f11e6659 721 match = NULL;
9fbdcfaf 722 cont = NULL;
071fb37e 723 for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
9fbdcfaf
SK
724 if (rt->rt6i_metric != metric) {
725 cont = rt;
726 break;
727 }
728
729 match = find_match(rt, oif, strict, &mpri, match, do_rr);
730 }
731
66f5d6ce 732 for (rt = leaf; rt && rt != rr_head;
071fb37e 733 rt = rcu_dereference(rt->rt6_next)) {
9fbdcfaf
SK
734 if (rt->rt6i_metric != metric) {
735 cont = rt;
736 break;
737 }
738
afc154e9 739 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
740 }
741
742 if (match || !cont)
743 return match;
744
071fb37e 745 for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
afc154e9 746 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 747
f11e6659
DM
748 return match;
749}
1da177e4 750
8d1040e8
WW
751static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
752 int oif, int strict)
f11e6659 753{
66f5d6ce 754 struct rt6_info *leaf = rcu_dereference(fn->leaf);
f11e6659 755 struct rt6_info *match, *rt0;
afc154e9 756 bool do_rr = false;
17ecf590 757 int key_plen;
1da177e4 758
87b1af8d 759 if (!leaf || leaf == net->ipv6.ip6_null_entry)
8d1040e8
WW
760 return net->ipv6.ip6_null_entry;
761
66f5d6ce 762 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 763 if (!rt0)
66f5d6ce 764 rt0 = leaf;
1da177e4 765
17ecf590
WW
766 /* Double check to make sure fn is not an intermediate node
767 * and fn->leaf does not points to its child's leaf
768 * (This might happen if all routes under fn are deleted from
769 * the tree and fib6_repair_tree() is called on the node.)
770 */
771 key_plen = rt0->rt6i_dst.plen;
772#ifdef CONFIG_IPV6_SUBTREES
773 if (rt0->rt6i_src.plen)
774 key_plen = rt0->rt6i_src.plen;
775#endif
776 if (fn->fn_bit != key_plen)
777 return net->ipv6.ip6_null_entry;
778
8d1040e8 779 match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
afc154e9 780 &do_rr);
1da177e4 781
afc154e9 782 if (do_rr) {
071fb37e 783 struct rt6_info *next = rcu_dereference(rt0->rt6_next);
f11e6659 784
554cfb7e 785 /* no entries matched; do round-robin */
f11e6659 786 if (!next || next->rt6i_metric != rt0->rt6i_metric)
8d1040e8 787 next = leaf;
f11e6659 788
66f5d6ce
WW
789 if (next != rt0) {
790 spin_lock_bh(&leaf->rt6i_table->tb6_lock);
791 /* make sure next is not being deleted from the tree */
792 if (next->rt6i_node)
793 rcu_assign_pointer(fn->rr_ptr, next);
794 spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
795 }
1da177e4 796 }
1da177e4 797
a02cec21 798 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
799}
800
8b9df265
MKL
801static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
802{
803 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
804}
805
70ceb4f5
YH
806#ifdef CONFIG_IPV6_ROUTE_INFO
807int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 808 const struct in6_addr *gwaddr)
70ceb4f5 809{
c346dca1 810 struct net *net = dev_net(dev);
70ceb4f5
YH
811 struct route_info *rinfo = (struct route_info *) opt;
812 struct in6_addr prefix_buf, *prefix;
813 unsigned int pref;
4bed72e4 814 unsigned long lifetime;
70ceb4f5
YH
815 struct rt6_info *rt;
816
817 if (len < sizeof(struct route_info)) {
818 return -EINVAL;
819 }
820
821 /* Sanity check for prefix_len and length */
822 if (rinfo->length > 3) {
823 return -EINVAL;
824 } else if (rinfo->prefix_len > 128) {
825 return -EINVAL;
826 } else if (rinfo->prefix_len > 64) {
827 if (rinfo->length < 2) {
828 return -EINVAL;
829 }
830 } else if (rinfo->prefix_len > 0) {
831 if (rinfo->length < 1) {
832 return -EINVAL;
833 }
834 }
835
836 pref = rinfo->route_pref;
837 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 838 return -EINVAL;
70ceb4f5 839
4bed72e4 840 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
841
842 if (rinfo->length == 3)
843 prefix = (struct in6_addr *)rinfo->prefix;
844 else {
845 /* this function is safe */
846 ipv6_addr_prefix(&prefix_buf,
847 (struct in6_addr *)rinfo->prefix,
848 rinfo->prefix_len);
849 prefix = &prefix_buf;
850 }
851
f104a567
DJ
852 if (rinfo->prefix_len == 0)
853 rt = rt6_get_dflt_router(gwaddr, dev);
854 else
855 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 856 gwaddr, dev);
70ceb4f5
YH
857
858 if (rt && !lifetime) {
e0a1ad73 859 ip6_del_rt(rt);
70ceb4f5
YH
860 rt = NULL;
861 }
862
863 if (!rt && lifetime)
830218c1
DA
864 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
865 dev, pref);
70ceb4f5
YH
866 else if (rt)
867 rt->rt6i_flags = RTF_ROUTEINFO |
868 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
869
870 if (rt) {
1716a961
G
871 if (!addrconf_finite_timeout(lifetime))
872 rt6_clean_expires(rt);
873 else
874 rt6_set_expires(rt, jiffies + HZ * lifetime);
875
94e187c0 876 ip6_rt_put(rt);
70ceb4f5
YH
877 }
878 return 0;
879}
880#endif
881
a3c00e46
MKL
882static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
883 struct in6_addr *saddr)
884{
66f5d6ce 885 struct fib6_node *pn, *sn;
a3c00e46
MKL
886 while (1) {
887 if (fn->fn_flags & RTN_TL_ROOT)
888 return NULL;
66f5d6ce
WW
889 pn = rcu_dereference(fn->parent);
890 sn = FIB6_SUBTREE(pn);
891 if (sn && sn != fn)
892 fn = fib6_lookup(sn, NULL, saddr);
a3c00e46
MKL
893 else
894 fn = pn;
895 if (fn->fn_flags & RTN_RTINFO)
896 return fn;
897 }
898}
c71099ac 899
d3843fe5
WW
900static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
901 bool null_fallback)
902{
903 struct rt6_info *rt = *prt;
904
905 if (dst_hold_safe(&rt->dst))
906 return true;
907 if (null_fallback) {
908 rt = net->ipv6.ip6_null_entry;
909 dst_hold(&rt->dst);
910 } else {
911 rt = NULL;
912 }
913 *prt = rt;
914 return false;
915}
916
8ed67789
DL
917static struct rt6_info *ip6_pol_route_lookup(struct net *net,
918 struct fib6_table *table,
b75cc8f9
DA
919 struct flowi6 *fl6,
920 const struct sk_buff *skb,
921 int flags)
1da177e4 922{
2b760fcf 923 struct rt6_info *rt, *rt_cache;
1da177e4 924 struct fib6_node *fn;
1da177e4 925
b6cdbc85
DA
926 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
927 flags &= ~RT6_LOOKUP_F_IFACE;
928
66f5d6ce 929 rcu_read_lock();
4c9483b2 930 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 931restart:
66f5d6ce
WW
932 rt = rcu_dereference(fn->leaf);
933 if (!rt) {
934 rt = net->ipv6.ip6_null_entry;
935 } else {
936 rt = rt6_device_match(net, rt, &fl6->saddr,
937 fl6->flowi6_oif, flags);
938 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
b4bac172 939 rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
b75cc8f9 940 skb, flags);
66f5d6ce 941 }
a3c00e46
MKL
942 if (rt == net->ipv6.ip6_null_entry) {
943 fn = fib6_backtrack(fn, &fl6->saddr);
944 if (fn)
945 goto restart;
946 }
2b760fcf
WW
947 /* Search through exception table */
948 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
949 if (rt_cache)
950 rt = rt_cache;
951
d3843fe5
WW
952 if (ip6_hold_safe(net, &rt, true))
953 dst_use_noref(&rt->dst, jiffies);
954
66f5d6ce 955 rcu_read_unlock();
b811580d 956
b65f164d 957 trace_fib6_table_lookup(net, rt, table, fl6);
b811580d 958
c71099ac
TG
959 return rt;
960
961}
962
67ba4152 963struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 964 const struct sk_buff *skb, int flags)
ea6e574e 965{
b75cc8f9 966 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
967}
968EXPORT_SYMBOL_GPL(ip6_route_lookup);
969
9acd9f3a 970struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
971 const struct in6_addr *saddr, int oif,
972 const struct sk_buff *skb, int strict)
c71099ac 973{
4c9483b2
DM
974 struct flowi6 fl6 = {
975 .flowi6_oif = oif,
976 .daddr = *daddr,
c71099ac
TG
977 };
978 struct dst_entry *dst;
77d16f45 979 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 980
adaa70bb 981 if (saddr) {
4c9483b2 982 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
983 flags |= RT6_LOOKUP_F_HAS_SADDR;
984 }
985
b75cc8f9 986 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
987 if (dst->error == 0)
988 return (struct rt6_info *) dst;
989
990 dst_release(dst);
991
1da177e4
LT
992 return NULL;
993}
7159039a
YH
994EXPORT_SYMBOL(rt6_lookup);
995
c71099ac 996/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
997 * It takes new route entry, the addition fails by any reason the
998 * route is released.
999 * Caller must hold dst before calling it.
1da177e4
LT
1000 */
1001
e5fd387a 1002static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
333c4301
DA
1003 struct mx6_config *mxc,
1004 struct netlink_ext_ack *extack)
1da177e4
LT
1005{
1006 int err;
c71099ac 1007 struct fib6_table *table;
1da177e4 1008
c71099ac 1009 table = rt->rt6i_table;
66f5d6ce 1010 spin_lock_bh(&table->tb6_lock);
333c4301 1011 err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
66f5d6ce 1012 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1013
1014 return err;
1015}
1016
40e22e8f
TG
1017int ip6_ins_rt(struct rt6_info *rt)
1018{
e715b6d3
FW
1019 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
1020 struct mx6_config mxc = { .mx = NULL, };
1021
1cfb71ee
WW
1022 /* Hold dst to account for the reference from the fib6 tree */
1023 dst_hold(&rt->dst);
333c4301 1024 return __ip6_ins_rt(rt, &info, &mxc, NULL);
40e22e8f
TG
1025}
1026
4832c30d
DA
1027/* called with rcu_lock held */
1028static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
1029{
1030 struct net_device *dev = rt->dst.dev;
1031
98d11291 1032 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
4832c30d
DA
1033 /* for copies of local routes, dst->dev needs to be the
1034 * device if it is a master device, the master device if
1035 * device is enslaved, and the loopback as the default
1036 */
1037 if (netif_is_l3_slave(dev) &&
1038 !rt6_need_strict(&rt->rt6i_dst.addr))
1039 dev = l3mdev_master_dev_rcu(dev);
1040 else if (!netif_is_l3_master(dev))
1041 dev = dev_net(dev)->loopback_dev;
1042 /* last case is netif_is_l3_master(dev) is true in which
1043 * case we want dev returned to be dev
1044 */
1045 }
1046
1047 return dev;
1048}
1049
8b9df265
MKL
1050static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
1051 const struct in6_addr *daddr,
1052 const struct in6_addr *saddr)
1da177e4 1053{
4832c30d 1054 struct net_device *dev;
1da177e4
LT
1055 struct rt6_info *rt;
1056
1057 /*
1058 * Clone the route.
1059 */
1060
d52d3997 1061 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
3a2232e9 1062 ort = ort->from;
1da177e4 1063
4832c30d
DA
1064 rcu_read_lock();
1065 dev = ip6_rt_get_dev_rcu(ort);
1066 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1067 rcu_read_unlock();
83a09abd
MKL
1068 if (!rt)
1069 return NULL;
1070
1071 ip6_rt_copy_init(rt, ort);
1072 rt->rt6i_flags |= RTF_CACHE;
1073 rt->rt6i_metric = 0;
1074 rt->dst.flags |= DST_HOST;
1075 rt->rt6i_dst.addr = *daddr;
1076 rt->rt6i_dst.plen = 128;
1da177e4 1077
83a09abd
MKL
1078 if (!rt6_is_gw_or_nonexthop(ort)) {
1079 if (ort->rt6i_dst.plen != 128 &&
1080 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1081 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1082#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1083 if (rt->rt6i_src.plen && saddr) {
1084 rt->rt6i_src.addr = *saddr;
1085 rt->rt6i_src.plen = 128;
8b9df265 1086 }
83a09abd 1087#endif
95a9a5ba 1088 }
1da177e4 1089
95a9a5ba
YH
1090 return rt;
1091}
1da177e4 1092
d52d3997
MKL
1093static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1094{
4832c30d 1095 struct net_device *dev;
d52d3997
MKL
1096 struct rt6_info *pcpu_rt;
1097
4832c30d
DA
1098 rcu_read_lock();
1099 dev = ip6_rt_get_dev_rcu(rt);
1100 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1101 rcu_read_unlock();
d52d3997
MKL
1102 if (!pcpu_rt)
1103 return NULL;
1104 ip6_rt_copy_init(pcpu_rt, rt);
1105 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1106 pcpu_rt->rt6i_flags |= RTF_PCPU;
1107 return pcpu_rt;
1108}
1109
66f5d6ce 1110/* It should be called with rcu_read_lock() acquired */
d52d3997
MKL
1111static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1112{
a73e4195 1113 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1114
1115 p = this_cpu_ptr(rt->rt6i_pcpu);
1116 pcpu_rt = *p;
1117
d3843fe5 1118 if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
a73e4195 1119 rt6_dst_from_metrics_check(pcpu_rt);
d3843fe5 1120
a73e4195
MKL
1121 return pcpu_rt;
1122}
1123
1124static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1125{
1126 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1127
1128 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1129 if (!pcpu_rt) {
1130 struct net *net = dev_net(rt->dst.dev);
1131
9c7370a1
MKL
1132 dst_hold(&net->ipv6.ip6_null_entry->dst);
1133 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1134 }
1135
a94b9367
WW
1136 dst_hold(&pcpu_rt->dst);
1137 p = this_cpu_ptr(rt->rt6i_pcpu);
1138 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1139 BUG_ON(prev);
a94b9367 1140
d52d3997
MKL
1141 rt6_dst_from_metrics_check(pcpu_rt);
1142 return pcpu_rt;
1143}
1144
35732d01
WW
1145/* exception hash table implementation
1146 */
1147static DEFINE_SPINLOCK(rt6_exception_lock);
1148
1149/* Remove rt6_ex from hash table and free the memory
1150 * Caller must hold rt6_exception_lock
1151 */
1152static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1153 struct rt6_exception *rt6_ex)
1154{
b2427e67 1155 struct net *net;
81eb8447 1156
35732d01
WW
1157 if (!bucket || !rt6_ex)
1158 return;
b2427e67
CIK
1159
1160 net = dev_net(rt6_ex->rt6i->dst.dev);
35732d01
WW
1161 rt6_ex->rt6i->rt6i_node = NULL;
1162 hlist_del_rcu(&rt6_ex->hlist);
1163 rt6_release(rt6_ex->rt6i);
1164 kfree_rcu(rt6_ex, rcu);
1165 WARN_ON_ONCE(!bucket->depth);
1166 bucket->depth--;
81eb8447 1167 net->ipv6.rt6_stats->fib_rt_cache--;
35732d01
WW
1168}
1169
1170/* Remove oldest rt6_ex in bucket and free the memory
1171 * Caller must hold rt6_exception_lock
1172 */
1173static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1174{
1175 struct rt6_exception *rt6_ex, *oldest = NULL;
1176
1177 if (!bucket)
1178 return;
1179
1180 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1181 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1182 oldest = rt6_ex;
1183 }
1184 rt6_remove_exception(bucket, oldest);
1185}
1186
1187static u32 rt6_exception_hash(const struct in6_addr *dst,
1188 const struct in6_addr *src)
1189{
1190 static u32 seed __read_mostly;
1191 u32 val;
1192
1193 net_get_random_once(&seed, sizeof(seed));
1194 val = jhash(dst, sizeof(*dst), seed);
1195
1196#ifdef CONFIG_IPV6_SUBTREES
1197 if (src)
1198 val = jhash(src, sizeof(*src), val);
1199#endif
1200 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1201}
1202
1203/* Helper function to find the cached rt in the hash table
1204 * and update bucket pointer to point to the bucket for this
1205 * (daddr, saddr) pair
1206 * Caller must hold rt6_exception_lock
1207 */
1208static struct rt6_exception *
1209__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1210 const struct in6_addr *daddr,
1211 const struct in6_addr *saddr)
1212{
1213 struct rt6_exception *rt6_ex;
1214 u32 hval;
1215
1216 if (!(*bucket) || !daddr)
1217 return NULL;
1218
1219 hval = rt6_exception_hash(daddr, saddr);
1220 *bucket += hval;
1221
1222 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1223 struct rt6_info *rt6 = rt6_ex->rt6i;
1224 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1225
1226#ifdef CONFIG_IPV6_SUBTREES
1227 if (matched && saddr)
1228 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1229#endif
1230 if (matched)
1231 return rt6_ex;
1232 }
1233 return NULL;
1234}
1235
1236/* Helper function to find the cached rt in the hash table
1237 * and update bucket pointer to point to the bucket for this
1238 * (daddr, saddr) pair
1239 * Caller must hold rcu_read_lock()
1240 */
1241static struct rt6_exception *
1242__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1243 const struct in6_addr *daddr,
1244 const struct in6_addr *saddr)
1245{
1246 struct rt6_exception *rt6_ex;
1247 u32 hval;
1248
1249 WARN_ON_ONCE(!rcu_read_lock_held());
1250
1251 if (!(*bucket) || !daddr)
1252 return NULL;
1253
1254 hval = rt6_exception_hash(daddr, saddr);
1255 *bucket += hval;
1256
1257 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1258 struct rt6_info *rt6 = rt6_ex->rt6i;
1259 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1260
1261#ifdef CONFIG_IPV6_SUBTREES
1262 if (matched && saddr)
1263 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1264#endif
1265 if (matched)
1266 return rt6_ex;
1267 }
1268 return NULL;
1269}
1270
1271static int rt6_insert_exception(struct rt6_info *nrt,
1272 struct rt6_info *ort)
1273{
81eb8447 1274 struct net *net = dev_net(ort->dst.dev);
35732d01
WW
1275 struct rt6_exception_bucket *bucket;
1276 struct in6_addr *src_key = NULL;
1277 struct rt6_exception *rt6_ex;
1278 int err = 0;
1279
1280 /* ort can't be a cache or pcpu route */
1281 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
3a2232e9 1282 ort = ort->from;
35732d01
WW
1283 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1284
1285 spin_lock_bh(&rt6_exception_lock);
1286
1287 if (ort->exception_bucket_flushed) {
1288 err = -EINVAL;
1289 goto out;
1290 }
1291
1292 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1293 lockdep_is_held(&rt6_exception_lock));
1294 if (!bucket) {
1295 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1296 GFP_ATOMIC);
1297 if (!bucket) {
1298 err = -ENOMEM;
1299 goto out;
1300 }
1301 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1302 }
1303
1304#ifdef CONFIG_IPV6_SUBTREES
1305 /* rt6i_src.plen != 0 indicates ort is in subtree
1306 * and exception table is indexed by a hash of
1307 * both rt6i_dst and rt6i_src.
1308 * Otherwise, the exception table is indexed by
1309 * a hash of only rt6i_dst.
1310 */
1311 if (ort->rt6i_src.plen)
1312 src_key = &nrt->rt6i_src.addr;
1313#endif
60006a48
WW
1314
1315 /* Update rt6i_prefsrc as it could be changed
1316 * in rt6_remove_prefsrc()
1317 */
1318 nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
f5bbe7ee
WW
1319 /* rt6_mtu_change() might lower mtu on ort.
1320 * Only insert this exception route if its mtu
1321 * is less than ort's mtu value.
1322 */
1323 if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
1324 err = -EINVAL;
1325 goto out;
1326 }
60006a48 1327
35732d01
WW
1328 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1329 src_key);
1330 if (rt6_ex)
1331 rt6_remove_exception(bucket, rt6_ex);
1332
1333 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1334 if (!rt6_ex) {
1335 err = -ENOMEM;
1336 goto out;
1337 }
1338 rt6_ex->rt6i = nrt;
1339 rt6_ex->stamp = jiffies;
1340 atomic_inc(&nrt->rt6i_ref);
1341 nrt->rt6i_node = ort->rt6i_node;
1342 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1343 bucket->depth++;
81eb8447 1344 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1345
1346 if (bucket->depth > FIB6_MAX_DEPTH)
1347 rt6_exception_remove_oldest(bucket);
1348
1349out:
1350 spin_unlock_bh(&rt6_exception_lock);
1351
1352 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1353 if (!err) {
922c2ac8 1354 spin_lock_bh(&ort->rt6i_table->tb6_lock);
35732d01 1355 fib6_update_sernum(ort);
922c2ac8 1356 spin_unlock_bh(&ort->rt6i_table->tb6_lock);
b886d5f2
PA
1357 fib6_force_start_gc(net);
1358 }
35732d01
WW
1359
1360 return err;
1361}
1362
1363void rt6_flush_exceptions(struct rt6_info *rt)
1364{
1365 struct rt6_exception_bucket *bucket;
1366 struct rt6_exception *rt6_ex;
1367 struct hlist_node *tmp;
1368 int i;
1369
1370 spin_lock_bh(&rt6_exception_lock);
1371 /* Prevent rt6_insert_exception() to recreate the bucket list */
1372 rt->exception_bucket_flushed = 1;
1373
1374 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1375 lockdep_is_held(&rt6_exception_lock));
1376 if (!bucket)
1377 goto out;
1378
1379 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1380 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1381 rt6_remove_exception(bucket, rt6_ex);
1382 WARN_ON_ONCE(bucket->depth);
1383 bucket++;
1384 }
1385
1386out:
1387 spin_unlock_bh(&rt6_exception_lock);
1388}
1389
1390/* Find cached rt in the hash table inside passed in rt
1391 * Caller has to hold rcu_read_lock()
1392 */
1393static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1394 struct in6_addr *daddr,
1395 struct in6_addr *saddr)
1396{
1397 struct rt6_exception_bucket *bucket;
1398 struct in6_addr *src_key = NULL;
1399 struct rt6_exception *rt6_ex;
1400 struct rt6_info *res = NULL;
1401
1402 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1403
1404#ifdef CONFIG_IPV6_SUBTREES
1405 /* rt6i_src.plen != 0 indicates rt is in subtree
1406 * and exception table is indexed by a hash of
1407 * both rt6i_dst and rt6i_src.
1408 * Otherwise, the exception table is indexed by
1409 * a hash of only rt6i_dst.
1410 */
1411 if (rt->rt6i_src.plen)
1412 src_key = saddr;
1413#endif
1414 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1415
1416 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1417 res = rt6_ex->rt6i;
1418
1419 return res;
1420}
1421
1422/* Remove the passed in cached rt from the hash table that contains it */
1423int rt6_remove_exception_rt(struct rt6_info *rt)
1424{
35732d01 1425 struct rt6_exception_bucket *bucket;
3a2232e9 1426 struct rt6_info *from = rt->from;
35732d01
WW
1427 struct in6_addr *src_key = NULL;
1428 struct rt6_exception *rt6_ex;
1429 int err;
1430
1431 if (!from ||
442d713b 1432 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1433 return -EINVAL;
1434
1435 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1436 return -ENOENT;
1437
1438 spin_lock_bh(&rt6_exception_lock);
1439 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1440 lockdep_is_held(&rt6_exception_lock));
1441#ifdef CONFIG_IPV6_SUBTREES
1442 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1443 * and exception table is indexed by a hash of
1444 * both rt6i_dst and rt6i_src.
1445 * Otherwise, the exception table is indexed by
1446 * a hash of only rt6i_dst.
1447 */
1448 if (from->rt6i_src.plen)
1449 src_key = &rt->rt6i_src.addr;
1450#endif
1451 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1452 &rt->rt6i_dst.addr,
1453 src_key);
1454 if (rt6_ex) {
1455 rt6_remove_exception(bucket, rt6_ex);
1456 err = 0;
1457 } else {
1458 err = -ENOENT;
1459 }
1460
1461 spin_unlock_bh(&rt6_exception_lock);
1462 return err;
1463}
1464
1465/* Find rt6_ex which contains the passed in rt cache and
1466 * refresh its stamp
1467 */
1468static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1469{
35732d01 1470 struct rt6_exception_bucket *bucket;
3a2232e9 1471 struct rt6_info *from = rt->from;
35732d01
WW
1472 struct in6_addr *src_key = NULL;
1473 struct rt6_exception *rt6_ex;
1474
1475 if (!from ||
442d713b 1476 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1477 return;
1478
1479 rcu_read_lock();
1480 bucket = rcu_dereference(from->rt6i_exception_bucket);
1481
1482#ifdef CONFIG_IPV6_SUBTREES
1483 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1484 * and exception table is indexed by a hash of
1485 * both rt6i_dst and rt6i_src.
1486 * Otherwise, the exception table is indexed by
1487 * a hash of only rt6i_dst.
1488 */
1489 if (from->rt6i_src.plen)
1490 src_key = &rt->rt6i_src.addr;
1491#endif
1492 rt6_ex = __rt6_find_exception_rcu(&bucket,
1493 &rt->rt6i_dst.addr,
1494 src_key);
1495 if (rt6_ex)
1496 rt6_ex->stamp = jiffies;
1497
1498 rcu_read_unlock();
1499}
1500
60006a48
WW
1501static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1502{
1503 struct rt6_exception_bucket *bucket;
1504 struct rt6_exception *rt6_ex;
1505 int i;
1506
1507 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1508 lockdep_is_held(&rt6_exception_lock));
1509
1510 if (bucket) {
1511 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1512 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1513 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1514 }
1515 bucket++;
1516 }
1517 }
1518}
1519
e9fa1495
SB
1520static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1521 struct rt6_info *rt, int mtu)
1522{
1523 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1524 * lowest MTU in the path: always allow updating the route PMTU to
1525 * reflect PMTU decreases.
1526 *
1527 * If the new MTU is higher, and the route PMTU is equal to the local
1528 * MTU, this means the old MTU is the lowest in the path, so allow
1529 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1530 * handle this.
1531 */
1532
1533 if (dst_mtu(&rt->dst) >= mtu)
1534 return true;
1535
1536 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1537 return true;
1538
1539 return false;
1540}
1541
1542static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1543 struct rt6_info *rt, int mtu)
f5bbe7ee
WW
1544{
1545 struct rt6_exception_bucket *bucket;
1546 struct rt6_exception *rt6_ex;
1547 int i;
1548
1549 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1550 lockdep_is_held(&rt6_exception_lock));
1551
e9fa1495
SB
1552 if (!bucket)
1553 return;
1554
1555 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1556 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1557 struct rt6_info *entry = rt6_ex->rt6i;
1558
1559 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
1560 * route), the metrics of its rt->dst.from have already
1561 * been updated.
1562 */
1563 if (entry->rt6i_pmtu &&
1564 rt6_mtu_change_route_allowed(idev, entry, mtu))
1565 entry->rt6i_pmtu = mtu;
f5bbe7ee 1566 }
e9fa1495 1567 bucket++;
f5bbe7ee
WW
1568 }
1569}
1570
b16cb459
WW
1571#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1572
1573static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1574 struct in6_addr *gateway)
1575{
1576 struct rt6_exception_bucket *bucket;
1577 struct rt6_exception *rt6_ex;
1578 struct hlist_node *tmp;
1579 int i;
1580
1581 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1582 return;
1583
1584 spin_lock_bh(&rt6_exception_lock);
1585 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1586 lockdep_is_held(&rt6_exception_lock));
1587
1588 if (bucket) {
1589 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1590 hlist_for_each_entry_safe(rt6_ex, tmp,
1591 &bucket->chain, hlist) {
1592 struct rt6_info *entry = rt6_ex->rt6i;
1593
1594 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1595 RTF_CACHE_GATEWAY &&
1596 ipv6_addr_equal(gateway,
1597 &entry->rt6i_gateway)) {
1598 rt6_remove_exception(bucket, rt6_ex);
1599 }
1600 }
1601 bucket++;
1602 }
1603 }
1604
1605 spin_unlock_bh(&rt6_exception_lock);
1606}
1607
c757faa8
WW
1608static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1609 struct rt6_exception *rt6_ex,
1610 struct fib6_gc_args *gc_args,
1611 unsigned long now)
1612{
1613 struct rt6_info *rt = rt6_ex->rt6i;
1614
1859bac0
PA
1615 /* we are pruning and obsoleting aged-out and non gateway exceptions
1616 * even if others have still references to them, so that on next
1617 * dst_check() such references can be dropped.
1618 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1619 * expired, independently from their aging, as per RFC 8201 section 4
1620 */
31afeb42
WW
1621 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1622 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1623 RT6_TRACE("aging clone %p\n", rt);
1624 rt6_remove_exception(bucket, rt6_ex);
1625 return;
1626 }
1627 } else if (time_after(jiffies, rt->dst.expires)) {
1628 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1629 rt6_remove_exception(bucket, rt6_ex);
1630 return;
31afeb42
WW
1631 }
1632
1633 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1634 struct neighbour *neigh;
1635 __u8 neigh_flags = 0;
1636
1bfa26ff
ED
1637 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1638 if (neigh)
c757faa8 1639 neigh_flags = neigh->flags;
1bfa26ff 1640
c757faa8
WW
1641 if (!(neigh_flags & NTF_ROUTER)) {
1642 RT6_TRACE("purging route %p via non-router but gateway\n",
1643 rt);
1644 rt6_remove_exception(bucket, rt6_ex);
1645 return;
1646 }
1647 }
31afeb42 1648
c757faa8
WW
1649 gc_args->more++;
1650}
1651
1652void rt6_age_exceptions(struct rt6_info *rt,
1653 struct fib6_gc_args *gc_args,
1654 unsigned long now)
1655{
1656 struct rt6_exception_bucket *bucket;
1657 struct rt6_exception *rt6_ex;
1658 struct hlist_node *tmp;
1659 int i;
1660
1661 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1662 return;
1663
1bfa26ff
ED
1664 rcu_read_lock_bh();
1665 spin_lock(&rt6_exception_lock);
c757faa8
WW
1666 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1667 lockdep_is_held(&rt6_exception_lock));
1668
1669 if (bucket) {
1670 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1671 hlist_for_each_entry_safe(rt6_ex, tmp,
1672 &bucket->chain, hlist) {
1673 rt6_age_examine_exception(bucket, rt6_ex,
1674 gc_args, now);
1675 }
1676 bucket++;
1677 }
1678 }
1bfa26ff
ED
1679 spin_unlock(&rt6_exception_lock);
1680 rcu_read_unlock_bh();
c757faa8
WW
1681}
1682
9ff74384 1683struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
b75cc8f9
DA
1684 int oif, struct flowi6 *fl6,
1685 const struct sk_buff *skb, int flags)
1da177e4 1686{
367efcb9 1687 struct fib6_node *fn, *saved_fn;
2b760fcf 1688 struct rt6_info *rt, *rt_cache;
c71099ac 1689 int strict = 0;
1da177e4 1690
77d16f45 1691 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1692 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1693 if (net->ipv6.devconf_all->forwarding == 0)
1694 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1695
66f5d6ce 1696 rcu_read_lock();
1da177e4 1697
4c9483b2 1698 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1699 saved_fn = fn;
1da177e4 1700
ca254490
DA
1701 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1702 oif = 0;
1703
a3c00e46 1704redo_rt6_select:
8d1040e8 1705 rt = rt6_select(net, fn, oif, strict);
52bd4c0c 1706 if (rt->rt6i_nsiblings)
b4bac172 1707 rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
a3c00e46
MKL
1708 if (rt == net->ipv6.ip6_null_entry) {
1709 fn = fib6_backtrack(fn, &fl6->saddr);
1710 if (fn)
1711 goto redo_rt6_select;
367efcb9
MKL
1712 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1713 /* also consider unreachable route */
1714 strict &= ~RT6_LOOKUP_F_REACHABLE;
1715 fn = saved_fn;
1716 goto redo_rt6_select;
367efcb9 1717 }
a3c00e46
MKL
1718 }
1719
2b760fcf
WW
1720 /*Search through exception table */
1721 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
1722 if (rt_cache)
1723 rt = rt_cache;
fb9de91e 1724
d3843fe5 1725 if (rt == net->ipv6.ip6_null_entry) {
66f5d6ce 1726 rcu_read_unlock();
d3843fe5 1727 dst_hold(&rt->dst);
b65f164d 1728 trace_fib6_table_lookup(net, rt, table, fl6);
d3843fe5
WW
1729 return rt;
1730 } else if (rt->rt6i_flags & RTF_CACHE) {
1731 if (ip6_hold_safe(net, &rt, true)) {
1732 dst_use_noref(&rt->dst, jiffies);
1733 rt6_dst_from_metrics_check(rt);
1734 }
66f5d6ce 1735 rcu_read_unlock();
b65f164d 1736 trace_fib6_table_lookup(net, rt, table, fl6);
d52d3997 1737 return rt;
3da59bd9
MKL
1738 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1739 !(rt->rt6i_flags & RTF_GATEWAY))) {
1740 /* Create a RTF_CACHE clone which will not be
1741 * owned by the fib6 tree. It is for the special case where
1742 * the daddr in the skb during the neighbor look-up is different
1743 * from the fl6->daddr used to look-up route here.
1744 */
1745
1746 struct rt6_info *uncached_rt;
1747
d3843fe5
WW
1748 if (ip6_hold_safe(net, &rt, true)) {
1749 dst_use_noref(&rt->dst, jiffies);
1750 } else {
66f5d6ce 1751 rcu_read_unlock();
d3843fe5
WW
1752 uncached_rt = rt;
1753 goto uncached_rt_out;
1754 }
66f5d6ce 1755 rcu_read_unlock();
d52d3997 1756
3da59bd9
MKL
1757 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1758 dst_release(&rt->dst);
c71099ac 1759
1cfb71ee
WW
1760 if (uncached_rt) {
1761 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1762 * No need for another dst_hold()
1763 */
8d0b94af 1764 rt6_uncached_list_add(uncached_rt);
81eb8447 1765 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1766 } else {
3da59bd9 1767 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1768 dst_hold(&uncached_rt->dst);
1769 }
b811580d 1770
d3843fe5 1771uncached_rt_out:
b65f164d 1772 trace_fib6_table_lookup(net, uncached_rt, table, fl6);
3da59bd9 1773 return uncached_rt;
3da59bd9 1774
d52d3997
MKL
1775 } else {
1776 /* Get a percpu copy */
1777
1778 struct rt6_info *pcpu_rt;
1779
d3843fe5 1780 dst_use_noref(&rt->dst, jiffies);
951f788a 1781 local_bh_disable();
d52d3997 1782 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1783
951f788a 1784 if (!pcpu_rt) {
a94b9367
WW
1785 /* atomic_inc_not_zero() is needed when using rcu */
1786 if (atomic_inc_not_zero(&rt->rt6i_ref)) {
951f788a 1787 /* No dst_hold() on rt is needed because grabbing
a94b9367
WW
1788 * rt->rt6i_ref makes sure rt can't be released.
1789 */
a94b9367
WW
1790 pcpu_rt = rt6_make_pcpu_route(rt);
1791 rt6_release(rt);
1792 } else {
1793 /* rt is already removed from tree */
a94b9367
WW
1794 pcpu_rt = net->ipv6.ip6_null_entry;
1795 dst_hold(&pcpu_rt->dst);
1796 }
9c7370a1 1797 }
951f788a
ED
1798 local_bh_enable();
1799 rcu_read_unlock();
b65f164d 1800 trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
d52d3997
MKL
1801 return pcpu_rt;
1802 }
1da177e4 1803}
9ff74384 1804EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1805
b75cc8f9
DA
1806static struct rt6_info *ip6_pol_route_input(struct net *net,
1807 struct fib6_table *table,
1808 struct flowi6 *fl6,
1809 const struct sk_buff *skb,
1810 int flags)
4acad72d 1811{
b75cc8f9 1812 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1813}
1814
d409b847
MB
1815struct dst_entry *ip6_route_input_lookup(struct net *net,
1816 struct net_device *dev,
b75cc8f9
DA
1817 struct flowi6 *fl6,
1818 const struct sk_buff *skb,
1819 int flags)
72331bc0
SL
1820{
1821 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1822 flags |= RT6_LOOKUP_F_IFACE;
1823
b75cc8f9 1824 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1825}
d409b847 1826EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1827
23aebdac 1828static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1829 struct flow_keys *keys,
1830 struct flow_keys *flkeys)
23aebdac
JS
1831{
1832 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1833 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1834 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1835 const struct ipv6hdr *inner_iph;
1836 const struct icmp6hdr *icmph;
1837 struct ipv6hdr _inner_iph;
1838
1839 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1840 goto out;
1841
1842 icmph = icmp6_hdr(skb);
1843 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1844 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1845 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1846 icmph->icmp6_type != ICMPV6_PARAMPROB)
1847 goto out;
1848
1849 inner_iph = skb_header_pointer(skb,
1850 skb_transport_offset(skb) + sizeof(*icmph),
1851 sizeof(_inner_iph), &_inner_iph);
1852 if (!inner_iph)
1853 goto out;
1854
1855 key_iph = inner_iph;
5e5d6fed 1856 _flkeys = NULL;
23aebdac 1857out:
5e5d6fed
RP
1858 if (_flkeys) {
1859 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1860 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1861 keys->tags.flow_label = _flkeys->tags.flow_label;
1862 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1863 } else {
1864 keys->addrs.v6addrs.src = key_iph->saddr;
1865 keys->addrs.v6addrs.dst = key_iph->daddr;
1866 keys->tags.flow_label = ip6_flowinfo(key_iph);
1867 keys->basic.ip_proto = key_iph->nexthdr;
1868 }
23aebdac
JS
1869}
1870
1871/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
1872u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1873 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
1874{
1875 struct flow_keys hash_keys;
9a2a537a 1876 u32 mhash;
23aebdac 1877
bbfa047a 1878 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
1879 case 0:
1880 memset(&hash_keys, 0, sizeof(hash_keys));
1881 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1882 if (skb) {
1883 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
1884 } else {
1885 hash_keys.addrs.v6addrs.src = fl6->saddr;
1886 hash_keys.addrs.v6addrs.dst = fl6->daddr;
1887 hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
1888 hash_keys.basic.ip_proto = fl6->flowi6_proto;
1889 }
1890 break;
1891 case 1:
1892 if (skb) {
1893 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1894 struct flow_keys keys;
1895
1896 /* short-circuit if we already have L4 hash present */
1897 if (skb->l4_hash)
1898 return skb_get_hash_raw(skb) >> 1;
1899
1900 memset(&hash_keys, 0, sizeof(hash_keys));
1901
1902 if (!flkeys) {
1903 skb_flow_dissect_flow_keys(skb, &keys, flag);
1904 flkeys = &keys;
1905 }
1906 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1907 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
1908 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
1909 hash_keys.ports.src = flkeys->ports.src;
1910 hash_keys.ports.dst = flkeys->ports.dst;
1911 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
1912 } else {
1913 memset(&hash_keys, 0, sizeof(hash_keys));
1914 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1915 hash_keys.addrs.v6addrs.src = fl6->saddr;
1916 hash_keys.addrs.v6addrs.dst = fl6->daddr;
1917 hash_keys.ports.src = fl6->fl6_sport;
1918 hash_keys.ports.dst = fl6->fl6_dport;
1919 hash_keys.basic.ip_proto = fl6->flowi6_proto;
1920 }
1921 break;
23aebdac 1922 }
9a2a537a 1923 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 1924
9a2a537a 1925 return mhash >> 1;
23aebdac
JS
1926}
1927
c71099ac
TG
1928void ip6_route_input(struct sk_buff *skb)
1929{
b71d1d42 1930 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1931 struct net *net = dev_net(skb->dev);
adaa70bb 1932 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1933 struct ip_tunnel_info *tun_info;
4c9483b2 1934 struct flowi6 fl6 = {
e0d56fdd 1935 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
1936 .daddr = iph->daddr,
1937 .saddr = iph->saddr,
6502ca52 1938 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1939 .flowi6_mark = skb->mark,
1940 .flowi6_proto = iph->nexthdr,
c71099ac 1941 };
5e5d6fed 1942 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 1943
904af04d 1944 tun_info = skb_tunnel_info(skb);
46fa062a 1945 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1946 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
1947
1948 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
1949 flkeys = &_flkeys;
1950
23aebdac 1951 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 1952 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 1953 skb_dst_drop(skb);
b75cc8f9
DA
1954 skb_dst_set(skb,
1955 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
1956}
1957
b75cc8f9
DA
1958static struct rt6_info *ip6_pol_route_output(struct net *net,
1959 struct fib6_table *table,
1960 struct flowi6 *fl6,
1961 const struct sk_buff *skb,
1962 int flags)
1da177e4 1963{
b75cc8f9 1964 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
1965}
1966
6f21c96a
PA
1967struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1968 struct flowi6 *fl6, int flags)
c71099ac 1969{
d46a9d67 1970 bool any_src;
c71099ac 1971
4c1feac5
DA
1972 if (rt6_need_strict(&fl6->daddr)) {
1973 struct dst_entry *dst;
1974
1975 dst = l3mdev_link_scope_lookup(net, fl6);
1976 if (dst)
1977 return dst;
1978 }
ca254490 1979
1fb9489b 1980 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1981
d46a9d67 1982 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1983 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1984 (fl6->flowi6_oif && any_src))
77d16f45 1985 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1986
d46a9d67 1987 if (!any_src)
adaa70bb 1988 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1989 else if (sk)
1990 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1991
b75cc8f9 1992 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 1993}
6f21c96a 1994EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 1995
2774c131 1996struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1997{
5c1e6aa3 1998 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 1999 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2000 struct dst_entry *new = NULL;
2001
1dbe3252 2002 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2003 DST_OBSOLETE_DEAD, 0);
14e50e57 2004 if (rt) {
0a1f5962 2005 rt6_info_init(rt);
81eb8447 2006 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2007
0a1f5962 2008 new = &rt->dst;
14e50e57 2009 new->__use = 1;
352e512c 2010 new->input = dst_discard;
ede2059d 2011 new->output = dst_discard_out;
14e50e57 2012
0a1f5962 2013 dst_copy_metrics(new, &ort->dst);
14e50e57 2014
1dbe3252 2015 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2016 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2017 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2018 rt->rt6i_metric = 0;
2019
2020 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2021#ifdef CONFIG_IPV6_SUBTREES
2022 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2023#endif
14e50e57
DM
2024 }
2025
69ead7af
DM
2026 dst_release(dst_orig);
2027 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2028}
14e50e57 2029
1da177e4
LT
2030/*
2031 * Destination cache support functions
2032 */
2033
4b32b5ad
MKL
2034static void rt6_dst_from_metrics_check(struct rt6_info *rt)
2035{
3a2232e9
DM
2036 if (rt->from &&
2037 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst))
2038 dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true);
4b32b5ad
MKL
2039}
2040
3da59bd9
MKL
2041static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
2042{
36143645 2043 u32 rt_cookie = 0;
c5cff856
WW
2044
2045 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
3da59bd9
MKL
2046 return NULL;
2047
2048 if (rt6_check_expired(rt))
2049 return NULL;
2050
2051 return &rt->dst;
2052}
2053
2054static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
2055{
5973fb1e
MKL
2056 if (!__rt6_check_expired(rt) &&
2057 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3a2232e9 2058 rt6_check(rt->from, cookie))
3da59bd9
MKL
2059 return &rt->dst;
2060 else
2061 return NULL;
2062}
2063
1da177e4
LT
2064static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2065{
2066 struct rt6_info *rt;
2067
2068 rt = (struct rt6_info *) dst;
2069
6f3118b5
ND
2070 /* All IPV6 dsts are created with ->obsolete set to the value
2071 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2072 * into this function always.
2073 */
e3bc10bd 2074
4b32b5ad
MKL
2075 rt6_dst_from_metrics_check(rt);
2076
02bcf4e0 2077 if (rt->rt6i_flags & RTF_PCPU ||
3a2232e9 2078 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
3da59bd9
MKL
2079 return rt6_dst_from_check(rt, cookie);
2080 else
2081 return rt6_check(rt, cookie);
1da177e4
LT
2082}
2083
2084static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2085{
2086 struct rt6_info *rt = (struct rt6_info *) dst;
2087
2088 if (rt) {
54c1a859
YH
2089 if (rt->rt6i_flags & RTF_CACHE) {
2090 if (rt6_check_expired(rt)) {
2091 ip6_del_rt(rt);
2092 dst = NULL;
2093 }
2094 } else {
1da177e4 2095 dst_release(dst);
54c1a859
YH
2096 dst = NULL;
2097 }
1da177e4 2098 }
54c1a859 2099 return dst;
1da177e4
LT
2100}
2101
2102static void ip6_link_failure(struct sk_buff *skb)
2103{
2104 struct rt6_info *rt;
2105
3ffe533c 2106 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2107
adf30907 2108 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2109 if (rt) {
1eb4f758 2110 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0
WW
2111 if (dst_hold_safe(&rt->dst))
2112 ip6_del_rt(rt);
c5cff856
WW
2113 } else {
2114 struct fib6_node *fn;
2115
2116 rcu_read_lock();
2117 fn = rcu_dereference(rt->rt6i_node);
2118 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2119 fn->fn_sernum = -1;
2120 rcu_read_unlock();
1eb4f758 2121 }
1da177e4
LT
2122 }
2123}
2124
45e4fd26
MKL
2125static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2126{
2127 struct net *net = dev_net(rt->dst.dev);
2128
2129 rt->rt6i_flags |= RTF_MODIFIED;
2130 rt->rt6i_pmtu = mtu;
2131 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2132}
2133
0d3f6d29
MKL
2134static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2135{
2136 return !(rt->rt6i_flags & RTF_CACHE) &&
4e587ea7
WW
2137 (rt->rt6i_flags & RTF_PCPU ||
2138 rcu_access_pointer(rt->rt6i_node));
0d3f6d29
MKL
2139}
2140
45e4fd26
MKL
2141static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2142 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2143{
0dec879f 2144 const struct in6_addr *daddr, *saddr;
67ba4152 2145 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2146
45e4fd26
MKL
2147 if (rt6->rt6i_flags & RTF_LOCAL)
2148 return;
81aded24 2149
19bda36c
XL
2150 if (dst_metric_locked(dst, RTAX_MTU))
2151 return;
2152
0dec879f
JA
2153 if (iph) {
2154 daddr = &iph->daddr;
2155 saddr = &iph->saddr;
2156 } else if (sk) {
2157 daddr = &sk->sk_v6_daddr;
2158 saddr = &inet6_sk(sk)->saddr;
2159 } else {
2160 daddr = NULL;
2161 saddr = NULL;
2162 }
2163 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2164 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2165 if (mtu >= dst_mtu(dst))
2166 return;
9d289715 2167
0d3f6d29 2168 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2169 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2170 /* update rt6_ex->stamp for cache */
2171 if (rt6->rt6i_flags & RTF_CACHE)
2172 rt6_update_exception_stamp_rt(rt6);
0dec879f 2173 } else if (daddr) {
45e4fd26
MKL
2174 struct rt6_info *nrt6;
2175
45e4fd26
MKL
2176 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
2177 if (nrt6) {
2178 rt6_do_update_pmtu(nrt6, mtu);
2b760fcf
WW
2179 if (rt6_insert_exception(nrt6, rt6))
2180 dst_release_immediate(&nrt6->dst);
45e4fd26 2181 }
1da177e4
LT
2182 }
2183}
2184
45e4fd26
MKL
2185static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2186 struct sk_buff *skb, u32 mtu)
2187{
2188 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2189}
2190
42ae66c8 2191void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2192 int oif, u32 mark, kuid_t uid)
81aded24
DM
2193{
2194 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2195 struct dst_entry *dst;
2196 struct flowi6 fl6;
2197
2198 memset(&fl6, 0, sizeof(fl6));
2199 fl6.flowi6_oif = oif;
1b3c61dc 2200 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
2201 fl6.daddr = iph->daddr;
2202 fl6.saddr = iph->saddr;
6502ca52 2203 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2204 fl6.flowi6_uid = uid;
81aded24
DM
2205
2206 dst = ip6_route_output(net, NULL, &fl6);
2207 if (!dst->error)
45e4fd26 2208 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2209 dst_release(dst);
2210}
2211EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2212
2213void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2214{
33c162a9
MKL
2215 struct dst_entry *dst;
2216
81aded24 2217 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 2218 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2219
2220 dst = __sk_dst_get(sk);
2221 if (!dst || !dst->obsolete ||
2222 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2223 return;
2224
2225 bh_lock_sock(sk);
2226 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2227 ip6_datagram_dst_update(sk, false);
2228 bh_unlock_sock(sk);
81aded24
DM
2229}
2230EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2231
b55b76b2
DJ
2232/* Handle redirects */
2233struct ip6rd_flowi {
2234 struct flowi6 fl6;
2235 struct in6_addr gateway;
2236};
2237
2238static struct rt6_info *__ip6_route_redirect(struct net *net,
2239 struct fib6_table *table,
2240 struct flowi6 *fl6,
b75cc8f9 2241 const struct sk_buff *skb,
b55b76b2
DJ
2242 int flags)
2243{
2244 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2b760fcf 2245 struct rt6_info *rt, *rt_cache;
b55b76b2
DJ
2246 struct fib6_node *fn;
2247
2248 /* Get the "current" route for this destination and
67c408cf 2249 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2250 *
2251 * RFC 4861 specifies that redirects should only be
2252 * accepted if they come from the nexthop to the target.
2253 * Due to the way the routes are chosen, this notion
2254 * is a bit fuzzy and one might need to check all possible
2255 * routes.
2256 */
2257
66f5d6ce 2258 rcu_read_lock();
b55b76b2
DJ
2259 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2260restart:
66f5d6ce 2261 for_each_fib6_node_rt_rcu(fn) {
8067bb8c
IS
2262 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
2263 continue;
b55b76b2
DJ
2264 if (rt6_check_expired(rt))
2265 continue;
2266 if (rt->dst.error)
2267 break;
2268 if (!(rt->rt6i_flags & RTF_GATEWAY))
2269 continue;
2270 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
2271 continue;
2b760fcf
WW
2272 /* rt_cache's gateway might be different from its 'parent'
2273 * in the case of an ip redirect.
2274 * So we keep searching in the exception table if the gateway
2275 * is different.
2276 */
2277 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
2278 rt_cache = rt6_find_cached_rt(rt,
2279 &fl6->daddr,
2280 &fl6->saddr);
2281 if (rt_cache &&
2282 ipv6_addr_equal(&rdfl->gateway,
2283 &rt_cache->rt6i_gateway)) {
2284 rt = rt_cache;
2285 break;
2286 }
b55b76b2 2287 continue;
2b760fcf 2288 }
b55b76b2
DJ
2289 break;
2290 }
2291
2292 if (!rt)
2293 rt = net->ipv6.ip6_null_entry;
2294 else if (rt->dst.error) {
2295 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2296 goto out;
2297 }
2298
2299 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
2300 fn = fib6_backtrack(fn, &fl6->saddr);
2301 if (fn)
2302 goto restart;
b55b76b2 2303 }
a3c00e46 2304
b0a1ba59 2305out:
d3843fe5 2306 ip6_hold_safe(net, &rt, true);
b55b76b2 2307
66f5d6ce 2308 rcu_read_unlock();
b55b76b2 2309
b65f164d 2310 trace_fib6_table_lookup(net, rt, table, fl6);
b55b76b2
DJ
2311 return rt;
2312};
2313
2314static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2315 const struct flowi6 *fl6,
2316 const struct sk_buff *skb,
2317 const struct in6_addr *gateway)
b55b76b2
DJ
2318{
2319 int flags = RT6_LOOKUP_F_HAS_SADDR;
2320 struct ip6rd_flowi rdfl;
2321
2322 rdfl.fl6 = *fl6;
2323 rdfl.gateway = *gateway;
2324
b75cc8f9 2325 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2326 flags, __ip6_route_redirect);
2327}
2328
e2d118a1
LC
2329void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2330 kuid_t uid)
3a5ad2ee
DM
2331{
2332 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2333 struct dst_entry *dst;
2334 struct flowi6 fl6;
2335
2336 memset(&fl6, 0, sizeof(fl6));
e374c618 2337 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2338 fl6.flowi6_oif = oif;
2339 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2340 fl6.daddr = iph->daddr;
2341 fl6.saddr = iph->saddr;
6502ca52 2342 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2343 fl6.flowi6_uid = uid;
3a5ad2ee 2344
b75cc8f9 2345 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2346 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2347 dst_release(dst);
2348}
2349EXPORT_SYMBOL_GPL(ip6_redirect);
2350
c92a59ec
DJ
2351void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2352 u32 mark)
2353{
2354 const struct ipv6hdr *iph = ipv6_hdr(skb);
2355 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2356 struct dst_entry *dst;
2357 struct flowi6 fl6;
2358
2359 memset(&fl6, 0, sizeof(fl6));
e374c618 2360 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2361 fl6.flowi6_oif = oif;
2362 fl6.flowi6_mark = mark;
c92a59ec
DJ
2363 fl6.daddr = msg->dest;
2364 fl6.saddr = iph->daddr;
e2d118a1 2365 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2366
b75cc8f9 2367 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2368 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2369 dst_release(dst);
2370}
2371
3a5ad2ee
DM
2372void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2373{
e2d118a1
LC
2374 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2375 sk->sk_uid);
3a5ad2ee
DM
2376}
2377EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2378
0dbaee3b 2379static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2380{
0dbaee3b
DM
2381 struct net_device *dev = dst->dev;
2382 unsigned int mtu = dst_mtu(dst);
2383 struct net *net = dev_net(dev);
2384
1da177e4
LT
2385 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2386
5578689a
DL
2387 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2388 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2389
2390 /*
1ab1457c
YH
2391 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2392 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2393 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2394 * rely only on pmtu discovery"
2395 */
2396 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2397 mtu = IPV6_MAXPLEN;
2398 return mtu;
2399}
2400
ebb762f2 2401static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2402{
4b32b5ad
MKL
2403 const struct rt6_info *rt = (const struct rt6_info *)dst;
2404 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 2405 struct inet6_dev *idev;
618f9bc7 2406
4b32b5ad
MKL
2407 if (mtu)
2408 goto out;
2409
2410 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2411 if (mtu)
30f78d8e 2412 goto out;
618f9bc7
SK
2413
2414 mtu = IPV6_MIN_MTU;
d33e4553
DM
2415
2416 rcu_read_lock();
2417 idev = __in6_dev_get(dst->dev);
2418 if (idev)
2419 mtu = idev->cnf.mtu6;
2420 rcu_read_unlock();
2421
30f78d8e 2422out:
14972cbd
RP
2423 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2424
2425 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2426}
2427
3b00944c 2428struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2429 struct flowi6 *fl6)
1da177e4 2430{
87a11578 2431 struct dst_entry *dst;
1da177e4
LT
2432 struct rt6_info *rt;
2433 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2434 struct net *net = dev_net(dev);
1da177e4 2435
38308473 2436 if (unlikely(!idev))
122bdf67 2437 return ERR_PTR(-ENODEV);
1da177e4 2438
ad706862 2439 rt = ip6_dst_alloc(net, dev, 0);
38308473 2440 if (unlikely(!rt)) {
1da177e4 2441 in6_dev_put(idev);
87a11578 2442 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2443 goto out;
2444 }
2445
8e2ec639 2446 rt->dst.flags |= DST_HOST;
588753f1 2447 rt->dst.input = ip6_input;
8e2ec639 2448 rt->dst.output = ip6_output;
550bab42 2449 rt->rt6i_gateway = fl6->daddr;
87a11578 2450 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2451 rt->rt6i_dst.plen = 128;
2452 rt->rt6i_idev = idev;
14edd87d 2453 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2454
4c981e28 2455 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2456 * do proper release of the net_device
2457 */
2458 rt6_uncached_list_add(rt);
81eb8447 2459 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2460
87a11578
DM
2461 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2462
1da177e4 2463out:
87a11578 2464 return dst;
1da177e4
LT
2465}
2466
569d3645 2467static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2468{
86393e52 2469 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2470 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2471 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2472 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2473 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2474 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2475 int entries;
7019b78e 2476
fc66f95c 2477 entries = dst_entries_get_fast(ops);
49a18d86 2478 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2479 entries <= rt_max_size)
1da177e4
LT
2480 goto out;
2481
6891a346 2482 net->ipv6.ip6_rt_gc_expire++;
14956643 2483 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2484 entries = dst_entries_get_slow(ops);
2485 if (entries < ops->gc_thresh)
7019b78e 2486 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2487out:
7019b78e 2488 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2489 return entries > rt_max_size;
1da177e4
LT
2490}
2491
e715b6d3
FW
2492static int ip6_convert_metrics(struct mx6_config *mxc,
2493 const struct fib6_config *cfg)
2494{
6670e152 2495 struct net *net = cfg->fc_nlinfo.nl_net;
c3a8d947 2496 bool ecn_ca = false;
e715b6d3
FW
2497 struct nlattr *nla;
2498 int remaining;
2499 u32 *mp;
2500
63159f29 2501 if (!cfg->fc_mx)
e715b6d3
FW
2502 return 0;
2503
2504 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
2505 if (unlikely(!mp))
2506 return -ENOMEM;
2507
2508 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
2509 int type = nla_type(nla);
1bb14807 2510 u32 val;
e715b6d3 2511
1bb14807
DB
2512 if (!type)
2513 continue;
2514 if (unlikely(type > RTAX_MAX))
2515 goto err;
ea697639 2516
1bb14807
DB
2517 if (type == RTAX_CC_ALGO) {
2518 char tmp[TCP_CA_NAME_MAX];
e715b6d3 2519
1bb14807 2520 nla_strlcpy(tmp, nla, sizeof(tmp));
6670e152 2521 val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
1bb14807
DB
2522 if (val == TCP_CA_UNSPEC)
2523 goto err;
2524 } else {
2525 val = nla_get_u32(nla);
e715b6d3 2526 }
626abd59
PA
2527 if (type == RTAX_HOPLIMIT && val > 255)
2528 val = 255;
b8d3e416
DB
2529 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
2530 goto err;
1bb14807
DB
2531
2532 mp[type - 1] = val;
2533 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
2534 }
2535
c3a8d947
DB
2536 if (ecn_ca) {
2537 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
2538 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
2539 }
e715b6d3 2540
c3a8d947 2541 mxc->mx = mp;
e715b6d3
FW
2542 return 0;
2543 err:
2544 kfree(mp);
2545 return -EINVAL;
2546}
1da177e4 2547
8c14586f
DA
2548static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2549 struct fib6_config *cfg,
f4797b33
DA
2550 const struct in6_addr *gw_addr,
2551 u32 tbid, int flags)
8c14586f
DA
2552{
2553 struct flowi6 fl6 = {
2554 .flowi6_oif = cfg->fc_ifindex,
2555 .daddr = *gw_addr,
2556 .saddr = cfg->fc_prefsrc,
2557 };
2558 struct fib6_table *table;
2559 struct rt6_info *rt;
8c14586f 2560
f4797b33 2561 table = fib6_get_table(net, tbid);
8c14586f
DA
2562 if (!table)
2563 return NULL;
2564
2565 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2566 flags |= RT6_LOOKUP_F_HAS_SADDR;
2567
f4797b33 2568 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2569 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2570
2571 /* if table lookup failed, fall back to full lookup */
2572 if (rt == net->ipv6.ip6_null_entry) {
2573 ip6_rt_put(rt);
2574 rt = NULL;
2575 }
2576
2577 return rt;
2578}
2579
fc1e64e1
DA
2580static int ip6_route_check_nh_onlink(struct net *net,
2581 struct fib6_config *cfg,
9fbb704c 2582 const struct net_device *dev,
fc1e64e1
DA
2583 struct netlink_ext_ack *extack)
2584{
44750f84 2585 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2586 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2587 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2588 struct rt6_info *grt;
2589 int err;
2590
2591 err = 0;
2592 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2593 if (grt) {
58e354c0
DA
2594 if (!grt->dst.error &&
2595 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2596 NL_SET_ERR_MSG(extack,
2597 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2598 err = -EINVAL;
2599 }
2600
2601 ip6_rt_put(grt);
2602 }
2603
2604 return err;
2605}
2606
1edce99f
DA
2607static int ip6_route_check_nh(struct net *net,
2608 struct fib6_config *cfg,
2609 struct net_device **_dev,
2610 struct inet6_dev **idev)
2611{
2612 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2613 struct net_device *dev = _dev ? *_dev : NULL;
2614 struct rt6_info *grt = NULL;
2615 int err = -EHOSTUNREACH;
2616
2617 if (cfg->fc_table) {
f4797b33
DA
2618 int flags = RT6_LOOKUP_F_IFACE;
2619
2620 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2621 cfg->fc_table, flags);
1edce99f
DA
2622 if (grt) {
2623 if (grt->rt6i_flags & RTF_GATEWAY ||
2624 (dev && dev != grt->dst.dev)) {
2625 ip6_rt_put(grt);
2626 grt = NULL;
2627 }
2628 }
2629 }
2630
2631 if (!grt)
b75cc8f9 2632 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2633
2634 if (!grt)
2635 goto out;
2636
2637 if (dev) {
2638 if (dev != grt->dst.dev) {
2639 ip6_rt_put(grt);
2640 goto out;
2641 }
2642 } else {
2643 *_dev = dev = grt->dst.dev;
2644 *idev = grt->rt6i_idev;
2645 dev_hold(dev);
2646 in6_dev_hold(grt->rt6i_idev);
2647 }
2648
2649 if (!(grt->rt6i_flags & RTF_GATEWAY))
2650 err = 0;
2651
2652 ip6_rt_put(grt);
2653
2654out:
2655 return err;
2656}
2657
9fbb704c
DA
2658static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2659 struct net_device **_dev, struct inet6_dev **idev,
2660 struct netlink_ext_ack *extack)
2661{
2662 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2663 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2664 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2665 const struct net_device *dev = *_dev;
232378e8 2666 bool need_addr_check = !dev;
9fbb704c
DA
2667 int err = -EINVAL;
2668
2669 /* if gw_addr is local we will fail to detect this in case
2670 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2671 * will return already-added prefix route via interface that
2672 * prefix route was assigned to, which might be non-loopback.
2673 */
232378e8
DA
2674 if (dev &&
2675 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2676 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2677 goto out;
2678 }
2679
2680 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2681 /* IPv6 strictly inhibits using not link-local
2682 * addresses as nexthop address.
2683 * Otherwise, router will not able to send redirects.
2684 * It is very good, but in some (rare!) circumstances
2685 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2686 * some exceptions. --ANK
2687 * We allow IPv4-mapped nexthops to support RFC4798-type
2688 * addressing
2689 */
2690 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2691 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2692 goto out;
2693 }
2694
2695 if (cfg->fc_flags & RTNH_F_ONLINK)
2696 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2697 else
2698 err = ip6_route_check_nh(net, cfg, _dev, idev);
2699
2700 if (err)
2701 goto out;
2702 }
2703
2704 /* reload in case device was changed */
2705 dev = *_dev;
2706
2707 err = -EINVAL;
2708 if (!dev) {
2709 NL_SET_ERR_MSG(extack, "Egress device not specified");
2710 goto out;
2711 } else if (dev->flags & IFF_LOOPBACK) {
2712 NL_SET_ERR_MSG(extack,
2713 "Egress device can not be loopback device for this route");
2714 goto out;
2715 }
232378e8
DA
2716
2717 /* if we did not check gw_addr above, do so now that the
2718 * egress device has been resolved.
2719 */
2720 if (need_addr_check &&
2721 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2722 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2723 goto out;
2724 }
2725
9fbb704c
DA
2726 err = 0;
2727out:
2728 return err;
2729}
2730
333c4301
DA
2731static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2732 struct netlink_ext_ack *extack)
1da177e4 2733{
5578689a 2734 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
2735 struct rt6_info *rt = NULL;
2736 struct net_device *dev = NULL;
2737 struct inet6_dev *idev = NULL;
c71099ac 2738 struct fib6_table *table;
1da177e4 2739 int addr_type;
8c5b83f0 2740 int err = -EINVAL;
1da177e4 2741
557c44be 2742 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2743 if (cfg->fc_flags & RTF_PCPU) {
2744 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2745 goto out;
d5d531cb 2746 }
557c44be 2747
2ea2352e
WW
2748 /* RTF_CACHE is an internal flag; can not be set by userspace */
2749 if (cfg->fc_flags & RTF_CACHE) {
2750 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2751 goto out;
2752 }
2753
d5d531cb
DA
2754 if (cfg->fc_dst_len > 128) {
2755 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2756 goto out;
2757 }
2758 if (cfg->fc_src_len > 128) {
2759 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2760 goto out;
d5d531cb 2761 }
1da177e4 2762#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2763 if (cfg->fc_src_len) {
2764 NL_SET_ERR_MSG(extack,
2765 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2766 goto out;
d5d531cb 2767 }
1da177e4 2768#endif
86872cb5 2769 if (cfg->fc_ifindex) {
1da177e4 2770 err = -ENODEV;
5578689a 2771 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2772 if (!dev)
2773 goto out;
2774 idev = in6_dev_get(dev);
2775 if (!idev)
2776 goto out;
2777 }
2778
86872cb5
TG
2779 if (cfg->fc_metric == 0)
2780 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2781
fc1e64e1
DA
2782 if (cfg->fc_flags & RTNH_F_ONLINK) {
2783 if (!dev) {
2784 NL_SET_ERR_MSG(extack,
2785 "Nexthop device required for onlink");
2786 err = -ENODEV;
2787 goto out;
2788 }
2789
2790 if (!(dev->flags & IFF_UP)) {
2791 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2792 err = -ENETDOWN;
2793 goto out;
2794 }
2795 }
2796
d71314b4 2797 err = -ENOBUFS;
38308473
DM
2798 if (cfg->fc_nlinfo.nlh &&
2799 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2800 table = fib6_get_table(net, cfg->fc_table);
38308473 2801 if (!table) {
f3213831 2802 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2803 table = fib6_new_table(net, cfg->fc_table);
2804 }
2805 } else {
2806 table = fib6_new_table(net, cfg->fc_table);
2807 }
38308473
DM
2808
2809 if (!table)
c71099ac 2810 goto out;
c71099ac 2811
ad706862
MKL
2812 rt = ip6_dst_alloc(net, NULL,
2813 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 2814
38308473 2815 if (!rt) {
1da177e4
LT
2816 err = -ENOMEM;
2817 goto out;
2818 }
2819
1716a961
G
2820 if (cfg->fc_flags & RTF_EXPIRES)
2821 rt6_set_expires(rt, jiffies +
2822 clock_t_to_jiffies(cfg->fc_expires));
2823 else
2824 rt6_clean_expires(rt);
1da177e4 2825
86872cb5
TG
2826 if (cfg->fc_protocol == RTPROT_UNSPEC)
2827 cfg->fc_protocol = RTPROT_BOOT;
2828 rt->rt6i_protocol = cfg->fc_protocol;
2829
2830 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
2831
2832 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 2833 rt->dst.input = ip6_mc_input;
ab79ad14
2834 else if (cfg->fc_flags & RTF_LOCAL)
2835 rt->dst.input = ip6_input;
1da177e4 2836 else
d8d1f30b 2837 rt->dst.input = ip6_forward;
1da177e4 2838
d8d1f30b 2839 rt->dst.output = ip6_output;
1da177e4 2840
19e42e45
RP
2841 if (cfg->fc_encap) {
2842 struct lwtunnel_state *lwtstate;
2843
30357d7d 2844 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 2845 cfg->fc_encap, AF_INET6, cfg,
9ae28727 2846 &lwtstate, extack);
19e42e45
RP
2847 if (err)
2848 goto out;
61adedf3 2849 rt->dst.lwtstate = lwtstate_get(lwtstate);
9942895b 2850 lwtunnel_set_redirect(&rt->dst);
19e42e45
RP
2851 }
2852
86872cb5
TG
2853 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2854 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 2855 if (rt->rt6i_dst.plen == 128)
e5fd387a 2856 rt->dst.flags |= DST_HOST;
e5fd387a 2857
1da177e4 2858#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
2859 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2860 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
2861#endif
2862
86872cb5 2863 rt->rt6i_metric = cfg->fc_metric;
398958ae 2864 rt->rt6i_nh_weight = 1;
1da177e4
LT
2865
2866 /* We cannot add true routes via loopback here,
2867 they would result in kernel looping; promote them to reject routes
2868 */
86872cb5 2869 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
2870 (dev && (dev->flags & IFF_LOOPBACK) &&
2871 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2872 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 2873 /* hold loopback dev/idev if we haven't done so. */
5578689a 2874 if (dev != net->loopback_dev) {
1da177e4
LT
2875 if (dev) {
2876 dev_put(dev);
2877 in6_dev_put(idev);
2878 }
5578689a 2879 dev = net->loopback_dev;
1da177e4
LT
2880 dev_hold(dev);
2881 idev = in6_dev_get(dev);
2882 if (!idev) {
2883 err = -ENODEV;
2884 goto out;
2885 }
2886 }
1da177e4 2887 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
2888 switch (cfg->fc_type) {
2889 case RTN_BLACKHOLE:
2890 rt->dst.error = -EINVAL;
ede2059d 2891 rt->dst.output = dst_discard_out;
7150aede 2892 rt->dst.input = dst_discard;
ef2c7d7b
ND
2893 break;
2894 case RTN_PROHIBIT:
2895 rt->dst.error = -EACCES;
7150aede
K
2896 rt->dst.output = ip6_pkt_prohibit_out;
2897 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 2898 break;
b4949ab2 2899 case RTN_THROW:
0315e382 2900 case RTN_UNREACHABLE:
ef2c7d7b 2901 default:
7150aede 2902 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
2903 : (cfg->fc_type == RTN_UNREACHABLE)
2904 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
2905 rt->dst.output = ip6_pkt_discard_out;
2906 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
2907 break;
2908 }
1da177e4
LT
2909 goto install_route;
2910 }
2911
86872cb5 2912 if (cfg->fc_flags & RTF_GATEWAY) {
9fbb704c
DA
2913 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
2914 if (err)
48ed7b26 2915 goto out;
1da177e4 2916
9fbb704c 2917 rt->rt6i_gateway = cfg->fc_gateway;
1da177e4
LT
2918 }
2919
2920 err = -ENODEV;
38308473 2921 if (!dev)
1da177e4
LT
2922 goto out;
2923
428604fb
LB
2924 if (idev->cnf.disable_ipv6) {
2925 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
2926 err = -EACCES;
2927 goto out;
2928 }
2929
955ec4cb
DA
2930 if (!(dev->flags & IFF_UP)) {
2931 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2932 err = -ENETDOWN;
2933 goto out;
2934 }
2935
c3968a85
DW
2936 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2937 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 2938 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
2939 err = -EINVAL;
2940 goto out;
2941 }
4e3fd7a0 2942 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
2943 rt->rt6i_prefsrc.plen = 128;
2944 } else
2945 rt->rt6i_prefsrc.plen = 0;
2946
86872cb5 2947 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
2948
2949install_route:
5609b80a
IS
2950 if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
2951 !netif_carrier_ok(dev))
2952 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
fc1e64e1 2953 rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
d8d1f30b 2954 rt->dst.dev = dev;
1da177e4 2955 rt->rt6i_idev = idev;
c71099ac 2956 rt->rt6i_table = table;
63152fc0 2957
c346dca1 2958 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 2959
8c5b83f0 2960 return rt;
6b9ea5a6
RP
2961out:
2962 if (dev)
2963 dev_put(dev);
2964 if (idev)
2965 in6_dev_put(idev);
587fea74
WW
2966 if (rt)
2967 dst_release_immediate(&rt->dst);
6b9ea5a6 2968
8c5b83f0 2969 return ERR_PTR(err);
6b9ea5a6
RP
2970}
2971
333c4301
DA
2972int ip6_route_add(struct fib6_config *cfg,
2973 struct netlink_ext_ack *extack)
6b9ea5a6
RP
2974{
2975 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 2976 struct rt6_info *rt;
6b9ea5a6
RP
2977 int err;
2978
333c4301 2979 rt = ip6_route_info_create(cfg, extack);
8c5b83f0
RP
2980 if (IS_ERR(rt)) {
2981 err = PTR_ERR(rt);
2982 rt = NULL;
6b9ea5a6 2983 goto out;
8c5b83f0 2984 }
6b9ea5a6 2985
e715b6d3
FW
2986 err = ip6_convert_metrics(&mxc, cfg);
2987 if (err)
2988 goto out;
1da177e4 2989
333c4301 2990 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
e715b6d3
FW
2991
2992 kfree(mxc.mx);
6b9ea5a6 2993
e715b6d3 2994 return err;
1da177e4 2995out:
587fea74
WW
2996 if (rt)
2997 dst_release_immediate(&rt->dst);
6b9ea5a6 2998
1da177e4
LT
2999 return err;
3000}
3001
86872cb5 3002static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
3003{
3004 int err;
c71099ac 3005 struct fib6_table *table;
d1918542 3006 struct net *net = dev_net(rt->dst.dev);
1da177e4 3007
a4c2fd7f 3008 if (rt == net->ipv6.ip6_null_entry) {
6825a26c
G
3009 err = -ENOENT;
3010 goto out;
3011 }
6c813a72 3012
c71099ac 3013 table = rt->rt6i_table;
66f5d6ce 3014 spin_lock_bh(&table->tb6_lock);
86872cb5 3015 err = fib6_del(rt, info);
66f5d6ce 3016 spin_unlock_bh(&table->tb6_lock);
1da177e4 3017
6825a26c 3018out:
94e187c0 3019 ip6_rt_put(rt);
1da177e4
LT
3020 return err;
3021}
3022
e0a1ad73
TG
3023int ip6_del_rt(struct rt6_info *rt)
3024{
4d1169c1 3025 struct nl_info info = {
d1918542 3026 .nl_net = dev_net(rt->dst.dev),
4d1169c1 3027 };
528c4ceb 3028 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3029}
3030
0ae81335
DA
3031static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
3032{
3033 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3034 struct net *net = info->nl_net;
16a16cd3 3035 struct sk_buff *skb = NULL;
0ae81335 3036 struct fib6_table *table;
e3330039 3037 int err = -ENOENT;
0ae81335 3038
e3330039
WC
3039 if (rt == net->ipv6.ip6_null_entry)
3040 goto out_put;
0ae81335 3041 table = rt->rt6i_table;
66f5d6ce 3042 spin_lock_bh(&table->tb6_lock);
0ae81335
DA
3043
3044 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
3045 struct rt6_info *sibling, *next_sibling;
3046
16a16cd3
DA
3047 /* prefer to send a single notification with all hops */
3048 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3049 if (skb) {
3050 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3051
e3330039 3052 if (rt6_fill_node(net, skb, rt,
16a16cd3
DA
3053 NULL, NULL, 0, RTM_DELROUTE,
3054 info->portid, seq, 0) < 0) {
3055 kfree_skb(skb);
3056 skb = NULL;
3057 } else
3058 info->skip_notify = 1;
3059 }
3060
0ae81335
DA
3061 list_for_each_entry_safe(sibling, next_sibling,
3062 &rt->rt6i_siblings,
3063 rt6i_siblings) {
3064 err = fib6_del(sibling, info);
3065 if (err)
e3330039 3066 goto out_unlock;
0ae81335
DA
3067 }
3068 }
3069
3070 err = fib6_del(rt, info);
e3330039 3071out_unlock:
66f5d6ce 3072 spin_unlock_bh(&table->tb6_lock);
e3330039 3073out_put:
0ae81335 3074 ip6_rt_put(rt);
16a16cd3
DA
3075
3076 if (skb) {
e3330039 3077 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3078 info->nlh, gfp_any());
3079 }
0ae81335
DA
3080 return err;
3081}
3082
333c4301
DA
3083static int ip6_route_del(struct fib6_config *cfg,
3084 struct netlink_ext_ack *extack)
1da177e4 3085{
2b760fcf 3086 struct rt6_info *rt, *rt_cache;
c71099ac 3087 struct fib6_table *table;
1da177e4 3088 struct fib6_node *fn;
1da177e4
LT
3089 int err = -ESRCH;
3090
5578689a 3091 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3092 if (!table) {
3093 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3094 return err;
d5d531cb 3095 }
c71099ac 3096
66f5d6ce 3097 rcu_read_lock();
1da177e4 3098
c71099ac 3099 fn = fib6_locate(&table->tb6_root,
86872cb5 3100 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3101 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3102 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3103
1da177e4 3104 if (fn) {
66f5d6ce 3105 for_each_fib6_node_rt_rcu(fn) {
2b760fcf
WW
3106 if (cfg->fc_flags & RTF_CACHE) {
3107 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3108 &cfg->fc_src);
3109 if (!rt_cache)
3110 continue;
3111 rt = rt_cache;
3112 }
86872cb5 3113 if (cfg->fc_ifindex &&
d1918542
DM
3114 (!rt->dst.dev ||
3115 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 3116 continue;
86872cb5
TG
3117 if (cfg->fc_flags & RTF_GATEWAY &&
3118 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 3119 continue;
86872cb5 3120 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 3121 continue;
c2ed1880
M
3122 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
3123 continue;
d3843fe5
WW
3124 if (!dst_hold_safe(&rt->dst))
3125 break;
66f5d6ce 3126 rcu_read_unlock();
1da177e4 3127
0ae81335
DA
3128 /* if gateway was specified only delete the one hop */
3129 if (cfg->fc_flags & RTF_GATEWAY)
3130 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3131
3132 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3133 }
3134 }
66f5d6ce 3135 rcu_read_unlock();
1da177e4
LT
3136
3137 return err;
3138}
3139
6700c270 3140static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3141{
a6279458 3142 struct netevent_redirect netevent;
e8599ff4 3143 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3144 struct ndisc_options ndopts;
3145 struct inet6_dev *in6_dev;
3146 struct neighbour *neigh;
71bcdba0 3147 struct rd_msg *msg;
6e157b6a
DM
3148 int optlen, on_link;
3149 u8 *lladdr;
e8599ff4 3150
29a3cad5 3151 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3152 optlen -= sizeof(*msg);
e8599ff4
DM
3153
3154 if (optlen < 0) {
6e157b6a 3155 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3156 return;
3157 }
3158
71bcdba0 3159 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3160
71bcdba0 3161 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3162 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3163 return;
3164 }
3165
6e157b6a 3166 on_link = 0;
71bcdba0 3167 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3168 on_link = 1;
71bcdba0 3169 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3170 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3171 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3172 return;
3173 }
3174
3175 in6_dev = __in6_dev_get(skb->dev);
3176 if (!in6_dev)
3177 return;
3178 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3179 return;
3180
3181 /* RFC2461 8.1:
3182 * The IP source address of the Redirect MUST be the same as the current
3183 * first-hop router for the specified ICMP Destination Address.
3184 */
3185
f997c55c 3186 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3187 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3188 return;
3189 }
6e157b6a
DM
3190
3191 lladdr = NULL;
e8599ff4
DM
3192 if (ndopts.nd_opts_tgt_lladdr) {
3193 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3194 skb->dev);
3195 if (!lladdr) {
3196 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3197 return;
3198 }
3199 }
3200
6e157b6a 3201 rt = (struct rt6_info *) dst;
ec13ad1d 3202 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3203 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3204 return;
6e157b6a 3205 }
e8599ff4 3206
6e157b6a
DM
3207 /* Redirect received -> path was valid.
3208 * Look, redirects are sent only in response to data packets,
3209 * so that this nexthop apparently is reachable. --ANK
3210 */
0dec879f 3211 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3212
71bcdba0 3213 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3214 if (!neigh)
3215 return;
a6279458 3216
1da177e4
LT
3217 /*
3218 * We have finally decided to accept it.
3219 */
3220
f997c55c 3221 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3222 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3223 NEIGH_UPDATE_F_OVERRIDE|
3224 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3225 NEIGH_UPDATE_F_ISROUTER)),
3226 NDISC_REDIRECT, &ndopts);
1da177e4 3227
83a09abd 3228 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 3229 if (!nrt)
1da177e4
LT
3230 goto out;
3231
3232 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3233 if (on_link)
3234 nrt->rt6i_flags &= ~RTF_GATEWAY;
3235
b91d5329 3236 nrt->rt6i_protocol = RTPROT_REDIRECT;
4e3fd7a0 3237 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3238
2b760fcf
WW
3239 /* No need to remove rt from the exception table if rt is
3240 * a cached route because rt6_insert_exception() will
3241 * takes care of it
3242 */
3243 if (rt6_insert_exception(nrt, rt)) {
3244 dst_release_immediate(&nrt->dst);
3245 goto out;
3246 }
1da177e4 3247
d8d1f30b
CG
3248 netevent.old = &rt->dst;
3249 netevent.new = &nrt->dst;
71bcdba0 3250 netevent.daddr = &msg->dest;
60592833 3251 netevent.neigh = neigh;
8d71740c
TT
3252 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3253
1da177e4 3254out:
e8599ff4 3255 neigh_release(neigh);
6e157b6a
DM
3256}
3257
1da177e4
LT
3258/*
3259 * Misc support functions
3260 */
3261
4b32b5ad
MKL
3262static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
3263{
3a2232e9 3264 BUG_ON(from->from);
4b32b5ad
MKL
3265
3266 rt->rt6i_flags &= ~RTF_EXPIRES;
3267 dst_hold(&from->dst);
3a2232e9 3268 rt->from = from;
4b32b5ad
MKL
3269 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
3270}
3271
83a09abd
MKL
3272static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
3273{
3274 rt->dst.input = ort->dst.input;
3275 rt->dst.output = ort->dst.output;
3276 rt->rt6i_dst = ort->rt6i_dst;
3277 rt->dst.error = ort->dst.error;
3278 rt->rt6i_idev = ort->rt6i_idev;
3279 if (rt->rt6i_idev)
3280 in6_dev_hold(rt->rt6i_idev);
3281 rt->dst.lastuse = jiffies;
3282 rt->rt6i_gateway = ort->rt6i_gateway;
3283 rt->rt6i_flags = ort->rt6i_flags;
3284 rt6_set_from(rt, ort);
3285 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 3286#ifdef CONFIG_IPV6_SUBTREES
83a09abd 3287 rt->rt6i_src = ort->rt6i_src;
1da177e4 3288#endif
83a09abd
MKL
3289 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
3290 rt->rt6i_table = ort->rt6i_table;
61adedf3 3291 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
3292}
3293
70ceb4f5 3294#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 3295static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 3296 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3297 const struct in6_addr *gwaddr,
3298 struct net_device *dev)
70ceb4f5 3299{
830218c1
DA
3300 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3301 int ifindex = dev->ifindex;
70ceb4f5
YH
3302 struct fib6_node *fn;
3303 struct rt6_info *rt = NULL;
c71099ac
TG
3304 struct fib6_table *table;
3305
830218c1 3306 table = fib6_get_table(net, tb_id);
38308473 3307 if (!table)
c71099ac 3308 return NULL;
70ceb4f5 3309
66f5d6ce 3310 rcu_read_lock();
38fbeeee 3311 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3312 if (!fn)
3313 goto out;
3314
66f5d6ce 3315 for_each_fib6_node_rt_rcu(fn) {
d1918542 3316 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
3317 continue;
3318 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3319 continue;
3320 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
3321 continue;
d3843fe5 3322 ip6_hold_safe(NULL, &rt, false);
70ceb4f5
YH
3323 break;
3324 }
3325out:
66f5d6ce 3326 rcu_read_unlock();
70ceb4f5
YH
3327 return rt;
3328}
3329
efa2cea0 3330static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 3331 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3332 const struct in6_addr *gwaddr,
3333 struct net_device *dev,
95c96174 3334 unsigned int pref)
70ceb4f5 3335{
86872cb5 3336 struct fib6_config cfg = {
238fc7ea 3337 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3338 .fc_ifindex = dev->ifindex,
86872cb5
TG
3339 .fc_dst_len = prefixlen,
3340 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3341 RTF_UP | RTF_PREF(pref),
b91d5329 3342 .fc_protocol = RTPROT_RA,
15e47304 3343 .fc_nlinfo.portid = 0,
efa2cea0
DL
3344 .fc_nlinfo.nlh = NULL,
3345 .fc_nlinfo.nl_net = net,
86872cb5
TG
3346 };
3347
830218c1 3348 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3349 cfg.fc_dst = *prefix;
3350 cfg.fc_gateway = *gwaddr;
70ceb4f5 3351
e317da96
YH
3352 /* We should treat it as a default route if prefix length is 0. */
3353 if (!prefixlen)
86872cb5 3354 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3355
333c4301 3356 ip6_route_add(&cfg, NULL);
70ceb4f5 3357
830218c1 3358 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3359}
3360#endif
3361
b71d1d42 3362struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 3363{
830218c1 3364 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
1da177e4 3365 struct rt6_info *rt;
c71099ac 3366 struct fib6_table *table;
1da177e4 3367
830218c1 3368 table = fib6_get_table(dev_net(dev), tb_id);
38308473 3369 if (!table)
c71099ac 3370 return NULL;
1da177e4 3371
66f5d6ce
WW
3372 rcu_read_lock();
3373 for_each_fib6_node_rt_rcu(&table->tb6_root) {
d1918542 3374 if (dev == rt->dst.dev &&
045927ff 3375 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
3376 ipv6_addr_equal(&rt->rt6i_gateway, addr))
3377 break;
3378 }
3379 if (rt)
d3843fe5 3380 ip6_hold_safe(NULL, &rt, false);
66f5d6ce 3381 rcu_read_unlock();
1da177e4
LT
3382 return rt;
3383}
3384
b71d1d42 3385struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
3386 struct net_device *dev,
3387 unsigned int pref)
1da177e4 3388{
86872cb5 3389 struct fib6_config cfg = {
ca254490 3390 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3391 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3392 .fc_ifindex = dev->ifindex,
3393 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3394 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3395 .fc_protocol = RTPROT_RA,
15e47304 3396 .fc_nlinfo.portid = 0,
5578689a 3397 .fc_nlinfo.nlh = NULL,
c346dca1 3398 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 3399 };
1da177e4 3400
4e3fd7a0 3401 cfg.fc_gateway = *gwaddr;
1da177e4 3402
333c4301 3403 if (!ip6_route_add(&cfg, NULL)) {
830218c1
DA
3404 struct fib6_table *table;
3405
3406 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3407 if (table)
3408 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3409 }
1da177e4 3410
1da177e4
LT
3411 return rt6_get_dflt_router(gwaddr, dev);
3412}
3413
830218c1 3414static void __rt6_purge_dflt_routers(struct fib6_table *table)
1da177e4
LT
3415{
3416 struct rt6_info *rt;
3417
3418restart:
66f5d6ce
WW
3419 rcu_read_lock();
3420 for_each_fib6_node_rt_rcu(&table->tb6_root) {
3e8b0ac3
LC
3421 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3422 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d3843fe5 3423 if (dst_hold_safe(&rt->dst)) {
66f5d6ce 3424 rcu_read_unlock();
d3843fe5
WW
3425 ip6_del_rt(rt);
3426 } else {
66f5d6ce 3427 rcu_read_unlock();
d3843fe5 3428 }
1da177e4
LT
3429 goto restart;
3430 }
3431 }
66f5d6ce 3432 rcu_read_unlock();
830218c1
DA
3433
3434 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3435}
3436
3437void rt6_purge_dflt_routers(struct net *net)
3438{
3439 struct fib6_table *table;
3440 struct hlist_head *head;
3441 unsigned int h;
3442
3443 rcu_read_lock();
3444
3445 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3446 head = &net->ipv6.fib_table_hash[h];
3447 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3448 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3449 __rt6_purge_dflt_routers(table);
3450 }
3451 }
3452
3453 rcu_read_unlock();
1da177e4
LT
3454}
3455
5578689a
DL
3456static void rtmsg_to_fib6_config(struct net *net,
3457 struct in6_rtmsg *rtmsg,
86872cb5
TG
3458 struct fib6_config *cfg)
3459{
3460 memset(cfg, 0, sizeof(*cfg));
3461
ca254490
DA
3462 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3463 : RT6_TABLE_MAIN;
86872cb5
TG
3464 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3465 cfg->fc_metric = rtmsg->rtmsg_metric;
3466 cfg->fc_expires = rtmsg->rtmsg_info;
3467 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3468 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3469 cfg->fc_flags = rtmsg->rtmsg_flags;
3470
5578689a 3471 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3472
4e3fd7a0
AD
3473 cfg->fc_dst = rtmsg->rtmsg_dst;
3474 cfg->fc_src = rtmsg->rtmsg_src;
3475 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3476}
3477
5578689a 3478int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3479{
86872cb5 3480 struct fib6_config cfg;
1da177e4
LT
3481 struct in6_rtmsg rtmsg;
3482 int err;
3483
67ba4152 3484 switch (cmd) {
1da177e4
LT
3485 case SIOCADDRT: /* Add a route */
3486 case SIOCDELRT: /* Delete a route */
af31f412 3487 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3488 return -EPERM;
3489 err = copy_from_user(&rtmsg, arg,
3490 sizeof(struct in6_rtmsg));
3491 if (err)
3492 return -EFAULT;
86872cb5 3493
5578689a 3494 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3495
1da177e4
LT
3496 rtnl_lock();
3497 switch (cmd) {
3498 case SIOCADDRT:
333c4301 3499 err = ip6_route_add(&cfg, NULL);
1da177e4
LT
3500 break;
3501 case SIOCDELRT:
333c4301 3502 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3503 break;
3504 default:
3505 err = -EINVAL;
3506 }
3507 rtnl_unlock();
3508
3509 return err;
3ff50b79 3510 }
1da177e4
LT
3511
3512 return -EINVAL;
3513}
3514
3515/*
3516 * Drop the packet on the floor
3517 */
3518
d5fdd6ba 3519static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3520{
612f09e8 3521 int type;
adf30907 3522 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3523 switch (ipstats_mib_noroutes) {
3524 case IPSTATS_MIB_INNOROUTES:
0660e03f 3525 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3526 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
3527 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3528 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3529 break;
3530 }
3531 /* FALLTHROUGH */
3532 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3533 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3534 ipstats_mib_noroutes);
612f09e8
YH
3535 break;
3536 }
3ffe533c 3537 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3538 kfree_skb(skb);
3539 return 0;
3540}
3541
9ce8ade0
TG
3542static int ip6_pkt_discard(struct sk_buff *skb)
3543{
612f09e8 3544 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3545}
3546
ede2059d 3547static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3548{
adf30907 3549 skb->dev = skb_dst(skb)->dev;
612f09e8 3550 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3551}
3552
9ce8ade0
TG
3553static int ip6_pkt_prohibit(struct sk_buff *skb)
3554{
612f09e8 3555 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3556}
3557
ede2059d 3558static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3559{
adf30907 3560 skb->dev = skb_dst(skb)->dev;
612f09e8 3561 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3562}
3563
1da177e4
LT
3564/*
3565 * Allocate a dst for local (unicast / anycast) address.
3566 */
3567
3568struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
3569 const struct in6_addr *addr,
8f031519 3570 bool anycast)
1da177e4 3571{
ca254490 3572 u32 tb_id;
c346dca1 3573 struct net *net = dev_net(idev->dev);
4832c30d 3574 struct net_device *dev = idev->dev;
5f02ce24
DA
3575 struct rt6_info *rt;
3576
5f02ce24 3577 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
a3300ef4 3578 if (!rt)
1da177e4
LT
3579 return ERR_PTR(-ENOMEM);
3580
1da177e4
LT
3581 in6_dev_hold(idev);
3582
11d53b49 3583 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
3584 rt->dst.input = ip6_input;
3585 rt->dst.output = ip6_output;
1da177e4 3586 rt->rt6i_idev = idev;
1da177e4 3587
94b5e0f9 3588 rt->rt6i_protocol = RTPROT_KERNEL;
1da177e4 3589 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
3590 if (anycast)
3591 rt->rt6i_flags |= RTF_ANYCAST;
3592 else
1da177e4 3593 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 3594
550bab42 3595 rt->rt6i_gateway = *addr;
4e3fd7a0 3596 rt->rt6i_dst.addr = *addr;
1da177e4 3597 rt->rt6i_dst.plen = 128;
ca254490
DA
3598 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3599 rt->rt6i_table = fib6_get_table(net, tb_id);
1da177e4 3600
1da177e4
LT
3601 return rt;
3602}
3603
c3968a85
DW
3604/* remove deleted ip from prefsrc entries */
3605struct arg_dev_net_ip {
3606 struct net_device *dev;
3607 struct net *net;
3608 struct in6_addr *addr;
3609};
3610
3611static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3612{
3613 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3614 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3615 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3616
d1918542 3617 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
3618 rt != net->ipv6.ip6_null_entry &&
3619 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
60006a48 3620 spin_lock_bh(&rt6_exception_lock);
c3968a85
DW
3621 /* remove prefsrc entry */
3622 rt->rt6i_prefsrc.plen = 0;
60006a48
WW
3623 /* need to update cache as well */
3624 rt6_exceptions_remove_prefsrc(rt);
3625 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3626 }
3627 return 0;
3628}
3629
3630void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3631{
3632 struct net *net = dev_net(ifp->idev->dev);
3633 struct arg_dev_net_ip adni = {
3634 .dev = ifp->idev->dev,
3635 .net = net,
3636 .addr = &ifp->addr,
3637 };
0c3584d5 3638 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3639}
3640
be7a010d 3641#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3642
3643/* Remove routers and update dst entries when gateway turn into host. */
3644static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3645{
3646 struct in6_addr *gateway = (struct in6_addr *)arg;
3647
2b760fcf
WW
3648 if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3649 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
be7a010d
DJ
3650 return -1;
3651 }
b16cb459
WW
3652
3653 /* Further clean up cached routes in exception table.
3654 * This is needed because cached route may have a different
3655 * gateway than its 'parent' in the case of an ip redirect.
3656 */
3657 rt6_exceptions_clean_tohost(rt, gateway);
3658
be7a010d
DJ
3659 return 0;
3660}
3661
3662void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3663{
3664 fib6_clean_all(net, fib6_clean_tohost, gateway);
3665}
3666
2127d95a
IS
3667struct arg_netdev_event {
3668 const struct net_device *dev;
4c981e28
IS
3669 union {
3670 unsigned int nh_flags;
3671 unsigned long event;
3672 };
2127d95a
IS
3673};
3674
d7dedee1
IS
3675static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
3676{
3677 struct rt6_info *iter;
3678 struct fib6_node *fn;
3679
3680 fn = rcu_dereference_protected(rt->rt6i_node,
3681 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3682 iter = rcu_dereference_protected(fn->leaf,
3683 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3684 while (iter) {
3685 if (iter->rt6i_metric == rt->rt6i_metric &&
3686 rt6_qualify_for_ecmp(iter))
3687 return iter;
3688 iter = rcu_dereference_protected(iter->rt6_next,
3689 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3690 }
3691
3692 return NULL;
3693}
3694
3695static bool rt6_is_dead(const struct rt6_info *rt)
3696{
3697 if (rt->rt6i_nh_flags & RTNH_F_DEAD ||
3698 (rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
3699 rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
3700 return true;
3701
3702 return false;
3703}
3704
3705static int rt6_multipath_total_weight(const struct rt6_info *rt)
3706{
3707 struct rt6_info *iter;
3708 int total = 0;
3709
3710 if (!rt6_is_dead(rt))
398958ae 3711 total += rt->rt6i_nh_weight;
d7dedee1
IS
3712
3713 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
3714 if (!rt6_is_dead(iter))
398958ae 3715 total += iter->rt6i_nh_weight;
d7dedee1
IS
3716 }
3717
3718 return total;
3719}
3720
3721static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
3722{
3723 int upper_bound = -1;
3724
3725 if (!rt6_is_dead(rt)) {
398958ae 3726 *weight += rt->rt6i_nh_weight;
d7dedee1
IS
3727 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3728 total) - 1;
3729 }
3730 atomic_set(&rt->rt6i_nh_upper_bound, upper_bound);
3731}
3732
3733static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
3734{
3735 struct rt6_info *iter;
3736 int weight = 0;
3737
3738 rt6_upper_bound_set(rt, &weight, total);
3739
3740 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3741 rt6_upper_bound_set(iter, &weight, total);
3742}
3743
3744void rt6_multipath_rebalance(struct rt6_info *rt)
3745{
3746 struct rt6_info *first;
3747 int total;
3748
3749 /* In case the entire multipath route was marked for flushing,
3750 * then there is no need to rebalance upon the removal of every
3751 * sibling route.
3752 */
3753 if (!rt->rt6i_nsiblings || rt->should_flush)
3754 return;
3755
3756 /* During lookup routes are evaluated in order, so we need to
3757 * make sure upper bounds are assigned from the first sibling
3758 * onwards.
3759 */
3760 first = rt6_multipath_first_sibling(rt);
3761 if (WARN_ON_ONCE(!first))
3762 return;
3763
3764 total = rt6_multipath_total_weight(first);
3765 rt6_multipath_upper_bound_set(first, total);
3766}
3767
2127d95a
IS
3768static int fib6_ifup(struct rt6_info *rt, void *p_arg)
3769{
3770 const struct arg_netdev_event *arg = p_arg;
3771 const struct net *net = dev_net(arg->dev);
3772
1de178ed 3773 if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) {
2127d95a 3774 rt->rt6i_nh_flags &= ~arg->nh_flags;
1de178ed 3775 fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt);
d7dedee1 3776 rt6_multipath_rebalance(rt);
1de178ed 3777 }
2127d95a
IS
3778
3779 return 0;
3780}
3781
3782void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3783{
3784 struct arg_netdev_event arg = {
3785 .dev = dev,
6802f3ad
IS
3786 {
3787 .nh_flags = nh_flags,
3788 },
2127d95a
IS
3789 };
3790
3791 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3792 arg.nh_flags |= RTNH_F_LINKDOWN;
3793
3794 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3795}
3796
1de178ed
IS
3797static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
3798 const struct net_device *dev)
3799{
3800 struct rt6_info *iter;
3801
3802 if (rt->dst.dev == dev)
3803 return true;
3804 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3805 if (iter->dst.dev == dev)
3806 return true;
3807
3808 return false;
3809}
3810
3811static void rt6_multipath_flush(struct rt6_info *rt)
3812{
3813 struct rt6_info *iter;
3814
3815 rt->should_flush = 1;
3816 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3817 iter->should_flush = 1;
3818}
3819
3820static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
3821 const struct net_device *down_dev)
3822{
3823 struct rt6_info *iter;
3824 unsigned int dead = 0;
3825
3826 if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD)
3827 dead++;
3828 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3829 if (iter->dst.dev == down_dev ||
3830 iter->rt6i_nh_flags & RTNH_F_DEAD)
3831 dead++;
3832
3833 return dead;
3834}
3835
3836static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
3837 const struct net_device *dev,
3838 unsigned int nh_flags)
3839{
3840 struct rt6_info *iter;
3841
3842 if (rt->dst.dev == dev)
3843 rt->rt6i_nh_flags |= nh_flags;
3844 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3845 if (iter->dst.dev == dev)
3846 iter->rt6i_nh_flags |= nh_flags;
3847}
3848
a1a22c12 3849/* called with write lock held for table with rt */
4c981e28 3850static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
1da177e4 3851{
4c981e28
IS
3852 const struct arg_netdev_event *arg = p_arg;
3853 const struct net_device *dev = arg->dev;
3854 const struct net *net = dev_net(dev);
8ed67789 3855
1de178ed 3856 if (rt == net->ipv6.ip6_null_entry)
27c6fa73
IS
3857 return 0;
3858
3859 switch (arg->event) {
3860 case NETDEV_UNREGISTER:
1de178ed 3861 return rt->dst.dev == dev ? -1 : 0;
27c6fa73 3862 case NETDEV_DOWN:
1de178ed 3863 if (rt->should_flush)
27c6fa73 3864 return -1;
1de178ed
IS
3865 if (!rt->rt6i_nsiblings)
3866 return rt->dst.dev == dev ? -1 : 0;
3867 if (rt6_multipath_uses_dev(rt, dev)) {
3868 unsigned int count;
3869
3870 count = rt6_multipath_dead_count(rt, dev);
3871 if (rt->rt6i_nsiblings + 1 == count) {
3872 rt6_multipath_flush(rt);
3873 return -1;
3874 }
3875 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3876 RTNH_F_LINKDOWN);
3877 fib6_update_sernum(rt);
d7dedee1 3878 rt6_multipath_rebalance(rt);
1de178ed
IS
3879 }
3880 return -2;
27c6fa73 3881 case NETDEV_CHANGE:
1de178ed
IS
3882 if (rt->dst.dev != dev ||
3883 rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73
IS
3884 break;
3885 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 3886 rt6_multipath_rebalance(rt);
27c6fa73 3887 break;
2b241361 3888 }
c159d30c 3889
1da177e4
LT
3890 return 0;
3891}
3892
27c6fa73 3893void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 3894{
4c981e28 3895 struct arg_netdev_event arg = {
8ed67789 3896 .dev = dev,
6802f3ad
IS
3897 {
3898 .event = event,
3899 },
8ed67789
DL
3900 };
3901
4c981e28
IS
3902 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
3903}
3904
3905void rt6_disable_ip(struct net_device *dev, unsigned long event)
3906{
3907 rt6_sync_down_dev(dev, event);
3908 rt6_uncached_list_flush_dev(dev_net(dev), dev);
3909 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
3910}
3911
95c96174 3912struct rt6_mtu_change_arg {
1da177e4 3913 struct net_device *dev;
95c96174 3914 unsigned int mtu;
1da177e4
LT
3915};
3916
3917static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
3918{
3919 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
3920 struct inet6_dev *idev;
3921
3922 /* In IPv6 pmtu discovery is not optional,
3923 so that RTAX_MTU lock cannot disable it.
3924 We still use this lock to block changes
3925 caused by addrconf/ndisc.
3926 */
3927
3928 idev = __in6_dev_get(arg->dev);
38308473 3929 if (!idev)
1da177e4
LT
3930 return 0;
3931
3932 /* For administrative MTU increase, there is no way to discover
3933 IPv6 PMTU increase, so PMTU increase should be updated here.
3934 Since RFC 1981 doesn't include administrative MTU increase
3935 update PMTU increase is a MUST. (i.e. jumbo frame)
3936 */
d1918542 3937 if (rt->dst.dev == arg->dev &&
4b32b5ad 3938 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
f5bbe7ee 3939 spin_lock_bh(&rt6_exception_lock);
e9fa1495
SB
3940 if (dst_metric_raw(&rt->dst, RTAX_MTU) &&
3941 rt6_mtu_change_route_allowed(idev, rt, arg->mtu))
4b32b5ad 3942 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
e9fa1495 3943 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 3944 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 3945 }
1da177e4
LT
3946 return 0;
3947}
3948
95c96174 3949void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 3950{
c71099ac
TG
3951 struct rt6_mtu_change_arg arg = {
3952 .dev = dev,
3953 .mtu = mtu,
3954 };
1da177e4 3955
0c3584d5 3956 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
3957}
3958
ef7c79ed 3959static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 3960 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 3961 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 3962 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
3963 [RTA_PRIORITY] = { .type = NLA_U32 },
3964 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 3965 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 3966 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
3967 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
3968 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 3969 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 3970 [RTA_UID] = { .type = NLA_U32 },
3b45a410 3971 [RTA_MARK] = { .type = NLA_U32 },
86872cb5
TG
3972};
3973
3974static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
3975 struct fib6_config *cfg,
3976 struct netlink_ext_ack *extack)
1da177e4 3977{
86872cb5
TG
3978 struct rtmsg *rtm;
3979 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 3980 unsigned int pref;
86872cb5 3981 int err;
1da177e4 3982
fceb6435
JB
3983 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3984 NULL);
86872cb5
TG
3985 if (err < 0)
3986 goto errout;
1da177e4 3987
86872cb5
TG
3988 err = -EINVAL;
3989 rtm = nlmsg_data(nlh);
3990 memset(cfg, 0, sizeof(*cfg));
3991
3992 cfg->fc_table = rtm->rtm_table;
3993 cfg->fc_dst_len = rtm->rtm_dst_len;
3994 cfg->fc_src_len = rtm->rtm_src_len;
3995 cfg->fc_flags = RTF_UP;
3996 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 3997 cfg->fc_type = rtm->rtm_type;
86872cb5 3998
ef2c7d7b
ND
3999 if (rtm->rtm_type == RTN_UNREACHABLE ||
4000 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4001 rtm->rtm_type == RTN_PROHIBIT ||
4002 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4003 cfg->fc_flags |= RTF_REJECT;
4004
ab79ad14
4005 if (rtm->rtm_type == RTN_LOCAL)
4006 cfg->fc_flags |= RTF_LOCAL;
4007
1f56a01f
MKL
4008 if (rtm->rtm_flags & RTM_F_CLONED)
4009 cfg->fc_flags |= RTF_CACHE;
4010
fc1e64e1
DA
4011 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4012
15e47304 4013 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 4014 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 4015 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
4016
4017 if (tb[RTA_GATEWAY]) {
67b61f6c 4018 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4019 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4020 }
86872cb5
TG
4021
4022 if (tb[RTA_DST]) {
4023 int plen = (rtm->rtm_dst_len + 7) >> 3;
4024
4025 if (nla_len(tb[RTA_DST]) < plen)
4026 goto errout;
4027
4028 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4029 }
86872cb5
TG
4030
4031 if (tb[RTA_SRC]) {
4032 int plen = (rtm->rtm_src_len + 7) >> 3;
4033
4034 if (nla_len(tb[RTA_SRC]) < plen)
4035 goto errout;
4036
4037 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4038 }
86872cb5 4039
c3968a85 4040 if (tb[RTA_PREFSRC])
67b61f6c 4041 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4042
86872cb5
TG
4043 if (tb[RTA_OIF])
4044 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4045
4046 if (tb[RTA_PRIORITY])
4047 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4048
4049 if (tb[RTA_METRICS]) {
4050 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4051 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4052 }
86872cb5
TG
4053
4054 if (tb[RTA_TABLE])
4055 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4056
51ebd318
ND
4057 if (tb[RTA_MULTIPATH]) {
4058 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4059 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4060
4061 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4062 cfg->fc_mp_len, extack);
9ed59592
DA
4063 if (err < 0)
4064 goto errout;
51ebd318
ND
4065 }
4066
c78ba6d6
LR
4067 if (tb[RTA_PREF]) {
4068 pref = nla_get_u8(tb[RTA_PREF]);
4069 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4070 pref != ICMPV6_ROUTER_PREF_HIGH)
4071 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4072 cfg->fc_flags |= RTF_PREF(pref);
4073 }
4074
19e42e45
RP
4075 if (tb[RTA_ENCAP])
4076 cfg->fc_encap = tb[RTA_ENCAP];
4077
9ed59592 4078 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4079 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4080
c255bd68 4081 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4082 if (err < 0)
4083 goto errout;
4084 }
4085
32bc201e
XL
4086 if (tb[RTA_EXPIRES]) {
4087 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4088
4089 if (addrconf_finite_timeout(timeout)) {
4090 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4091 cfg->fc_flags |= RTF_EXPIRES;
4092 }
4093 }
4094
86872cb5
TG
4095 err = 0;
4096errout:
4097 return err;
1da177e4
LT
4098}
4099
6b9ea5a6
RP
4100struct rt6_nh {
4101 struct rt6_info *rt6_info;
4102 struct fib6_config r_cfg;
4103 struct mx6_config mxc;
4104 struct list_head next;
4105};
4106
4107static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4108{
4109 struct rt6_nh *nh;
4110
4111 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 4112 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
4113 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4114 nh->r_cfg.fc_ifindex);
4115 }
4116}
4117
4118static int ip6_route_info_append(struct list_head *rt6_nh_list,
4119 struct rt6_info *rt, struct fib6_config *r_cfg)
4120{
4121 struct rt6_nh *nh;
6b9ea5a6
RP
4122 int err = -EEXIST;
4123
4124 list_for_each_entry(nh, rt6_nh_list, next) {
4125 /* check if rt6_info already exists */
f06b7549 4126 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
6b9ea5a6
RP
4127 return err;
4128 }
4129
4130 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4131 if (!nh)
4132 return -ENOMEM;
4133 nh->rt6_info = rt;
4134 err = ip6_convert_metrics(&nh->mxc, r_cfg);
4135 if (err) {
4136 kfree(nh);
4137 return err;
4138 }
4139 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4140 list_add_tail(&nh->next, rt6_nh_list);
4141
4142 return 0;
4143}
4144
3b1137fe
DA
4145static void ip6_route_mpath_notify(struct rt6_info *rt,
4146 struct rt6_info *rt_last,
4147 struct nl_info *info,
4148 __u16 nlflags)
4149{
4150 /* if this is an APPEND route, then rt points to the first route
4151 * inserted and rt_last points to last route inserted. Userspace
4152 * wants a consistent dump of the route which starts at the first
4153 * nexthop. Since sibling routes are always added at the end of
4154 * the list, find the first sibling of the last route appended
4155 */
4156 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
4157 rt = list_first_entry(&rt_last->rt6i_siblings,
4158 struct rt6_info,
4159 rt6i_siblings);
4160 }
4161
4162 if (rt)
4163 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4164}
4165
333c4301
DA
4166static int ip6_route_multipath_add(struct fib6_config *cfg,
4167 struct netlink_ext_ack *extack)
51ebd318 4168{
3b1137fe
DA
4169 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
4170 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4171 struct fib6_config r_cfg;
4172 struct rtnexthop *rtnh;
6b9ea5a6
RP
4173 struct rt6_info *rt;
4174 struct rt6_nh *err_nh;
4175 struct rt6_nh *nh, *nh_safe;
3b1137fe 4176 __u16 nlflags;
51ebd318
ND
4177 int remaining;
4178 int attrlen;
6b9ea5a6
RP
4179 int err = 1;
4180 int nhn = 0;
4181 int replace = (cfg->fc_nlinfo.nlh &&
4182 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4183 LIST_HEAD(rt6_nh_list);
51ebd318 4184
3b1137fe
DA
4185 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4186 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4187 nlflags |= NLM_F_APPEND;
4188
35f1b4e9 4189 remaining = cfg->fc_mp_len;
51ebd318 4190 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4191
6b9ea5a6
RP
4192 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
4193 * rt6_info structs per nexthop
4194 */
51ebd318
ND
4195 while (rtnh_ok(rtnh, remaining)) {
4196 memcpy(&r_cfg, cfg, sizeof(*cfg));
4197 if (rtnh->rtnh_ifindex)
4198 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4199
4200 attrlen = rtnh_attrlen(rtnh);
4201 if (attrlen > 0) {
4202 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4203
4204 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4205 if (nla) {
67b61f6c 4206 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4207 r_cfg.fc_flags |= RTF_GATEWAY;
4208 }
19e42e45
RP
4209 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4210 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4211 if (nla)
4212 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4213 }
6b9ea5a6 4214
68e2ffde 4215 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
333c4301 4216 rt = ip6_route_info_create(&r_cfg, extack);
8c5b83f0
RP
4217 if (IS_ERR(rt)) {
4218 err = PTR_ERR(rt);
4219 rt = NULL;
6b9ea5a6 4220 goto cleanup;
8c5b83f0 4221 }
6b9ea5a6 4222
398958ae
IS
4223 rt->rt6i_nh_weight = rtnh->rtnh_hops + 1;
4224
6b9ea5a6 4225 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 4226 if (err) {
587fea74 4227 dst_release_immediate(&rt->dst);
6b9ea5a6
RP
4228 goto cleanup;
4229 }
4230
4231 rtnh = rtnh_next(rtnh, &remaining);
4232 }
4233
3b1137fe
DA
4234 /* for add and replace send one notification with all nexthops.
4235 * Skip the notification in fib6_add_rt2node and send one with
4236 * the full route when done
4237 */
4238 info->skip_notify = 1;
4239
6b9ea5a6
RP
4240 err_nh = NULL;
4241 list_for_each_entry(nh, &rt6_nh_list, next) {
3b1137fe 4242 rt_last = nh->rt6_info;
333c4301 4243 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3b1137fe
DA
4244 /* save reference to first route for notification */
4245 if (!rt_notif && !err)
4246 rt_notif = nh->rt6_info;
4247
6b9ea5a6
RP
4248 /* nh->rt6_info is used or freed at this point, reset to NULL*/
4249 nh->rt6_info = NULL;
4250 if (err) {
4251 if (replace && nhn)
4252 ip6_print_replace_route_err(&rt6_nh_list);
4253 err_nh = nh;
4254 goto add_errout;
51ebd318 4255 }
6b9ea5a6 4256
1a72418b 4257 /* Because each route is added like a single route we remove
27596472
MK
4258 * these flags after the first nexthop: if there is a collision,
4259 * we have already failed to add the first nexthop:
4260 * fib6_add_rt2node() has rejected it; when replacing, old
4261 * nexthops have been replaced by first new, the rest should
4262 * be added to it.
1a72418b 4263 */
27596472
MK
4264 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4265 NLM_F_REPLACE);
6b9ea5a6
RP
4266 nhn++;
4267 }
4268
3b1137fe
DA
4269 /* success ... tell user about new route */
4270 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4271 goto cleanup;
4272
4273add_errout:
3b1137fe
DA
4274 /* send notification for routes that were added so that
4275 * the delete notifications sent by ip6_route_del are
4276 * coherent
4277 */
4278 if (rt_notif)
4279 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4280
6b9ea5a6
RP
4281 /* Delete routes that were already added */
4282 list_for_each_entry(nh, &rt6_nh_list, next) {
4283 if (err_nh == nh)
4284 break;
333c4301 4285 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4286 }
4287
4288cleanup:
4289 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
587fea74
WW
4290 if (nh->rt6_info)
4291 dst_release_immediate(&nh->rt6_info->dst);
52fe51f8 4292 kfree(nh->mxc.mx);
6b9ea5a6
RP
4293 list_del(&nh->next);
4294 kfree(nh);
4295 }
4296
4297 return err;
4298}
4299
333c4301
DA
4300static int ip6_route_multipath_del(struct fib6_config *cfg,
4301 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4302{
4303 struct fib6_config r_cfg;
4304 struct rtnexthop *rtnh;
4305 int remaining;
4306 int attrlen;
4307 int err = 1, last_err = 0;
4308
4309 remaining = cfg->fc_mp_len;
4310 rtnh = (struct rtnexthop *)cfg->fc_mp;
4311
4312 /* Parse a Multipath Entry */
4313 while (rtnh_ok(rtnh, remaining)) {
4314 memcpy(&r_cfg, cfg, sizeof(*cfg));
4315 if (rtnh->rtnh_ifindex)
4316 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4317
4318 attrlen = rtnh_attrlen(rtnh);
4319 if (attrlen > 0) {
4320 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4321
4322 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4323 if (nla) {
4324 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4325 r_cfg.fc_flags |= RTF_GATEWAY;
4326 }
4327 }
333c4301 4328 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4329 if (err)
4330 last_err = err;
4331
51ebd318
ND
4332 rtnh = rtnh_next(rtnh, &remaining);
4333 }
4334
4335 return last_err;
4336}
4337
c21ef3e3
DA
4338static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4339 struct netlink_ext_ack *extack)
1da177e4 4340{
86872cb5
TG
4341 struct fib6_config cfg;
4342 int err;
1da177e4 4343
333c4301 4344 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4345 if (err < 0)
4346 return err;
4347
51ebd318 4348 if (cfg.fc_mp)
333c4301 4349 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4350 else {
4351 cfg.fc_delete_all_nh = 1;
333c4301 4352 return ip6_route_del(&cfg, extack);
0ae81335 4353 }
1da177e4
LT
4354}
4355
c21ef3e3
DA
4356static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4357 struct netlink_ext_ack *extack)
1da177e4 4358{
86872cb5
TG
4359 struct fib6_config cfg;
4360 int err;
1da177e4 4361
333c4301 4362 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4363 if (err < 0)
4364 return err;
4365
51ebd318 4366 if (cfg.fc_mp)
333c4301 4367 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4368 else
333c4301 4369 return ip6_route_add(&cfg, extack);
1da177e4
LT
4370}
4371
beb1afac 4372static size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f 4373{
beb1afac
DA
4374 int nexthop_len = 0;
4375
4376 if (rt->rt6i_nsiblings) {
4377 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4378 + NLA_ALIGN(sizeof(struct rtnexthop))
4379 + nla_total_size(16) /* RTA_GATEWAY */
beb1afac
DA
4380 + lwtunnel_get_encap_size(rt->dst.lwtstate);
4381
4382 nexthop_len *= rt->rt6i_nsiblings;
4383 }
4384
339bf98f
TG
4385 return NLMSG_ALIGN(sizeof(struct rtmsg))
4386 + nla_total_size(16) /* RTA_SRC */
4387 + nla_total_size(16) /* RTA_DST */
4388 + nla_total_size(16) /* RTA_GATEWAY */
4389 + nla_total_size(16) /* RTA_PREFSRC */
4390 + nla_total_size(4) /* RTA_TABLE */
4391 + nla_total_size(4) /* RTA_IIF */
4392 + nla_total_size(4) /* RTA_OIF */
4393 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4394 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4395 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4396 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4397 + nla_total_size(1) /* RTA_PREF */
beb1afac
DA
4398 + lwtunnel_get_encap_size(rt->dst.lwtstate)
4399 + nexthop_len;
4400}
4401
4402static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
5be083ce 4403 unsigned int *flags, bool skip_oif)
beb1afac 4404{
f9d882ea
IS
4405 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
4406 *flags |= RTNH_F_DEAD;
4407
44c9f2f2 4408 if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) {
beb1afac
DA
4409 *flags |= RTNH_F_LINKDOWN;
4410 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
4411 *flags |= RTNH_F_DEAD;
4412 }
4413
4414 if (rt->rt6i_flags & RTF_GATEWAY) {
4415 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
4416 goto nla_put_failure;
4417 }
4418
fc1e64e1 4419 *flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK);
fe400799 4420 if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
4421 *flags |= RTNH_F_OFFLOAD;
4422
5be083ce
DA
4423 /* not needed for multipath encoding b/c it has a rtnexthop struct */
4424 if (!skip_oif && rt->dst.dev &&
beb1afac
DA
4425 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
4426 goto nla_put_failure;
4427
4428 if (rt->dst.lwtstate &&
4429 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
4430 goto nla_put_failure;
4431
4432 return 0;
4433
4434nla_put_failure:
4435 return -EMSGSIZE;
4436}
4437
5be083ce 4438/* add multipath next hop */
beb1afac
DA
4439static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
4440{
4441 struct rtnexthop *rtnh;
4442 unsigned int flags = 0;
4443
4444 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4445 if (!rtnh)
4446 goto nla_put_failure;
4447
398958ae 4448 rtnh->rtnh_hops = rt->rt6i_nh_weight - 1;
beb1afac
DA
4449 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
4450
5be083ce 4451 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4452 goto nla_put_failure;
4453
4454 rtnh->rtnh_flags = flags;
4455
4456 /* length of rtnetlink header + attributes */
4457 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4458
4459 return 0;
4460
4461nla_put_failure:
4462 return -EMSGSIZE;
339bf98f
TG
4463}
4464
191cd582
BH
4465static int rt6_fill_node(struct net *net,
4466 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 4467 struct in6_addr *dst, struct in6_addr *src,
15e47304 4468 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4469 unsigned int flags)
1da177e4 4470{
4b32b5ad 4471 u32 metrics[RTAX_MAX];
1da177e4 4472 struct rtmsg *rtm;
2d7202bf 4473 struct nlmsghdr *nlh;
e3703b3d 4474 long expires;
9e762a4a 4475 u32 table;
1da177e4 4476
15e47304 4477 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4478 if (!nlh)
26932566 4479 return -EMSGSIZE;
2d7202bf
TG
4480
4481 rtm = nlmsg_data(nlh);
1da177e4
LT
4482 rtm->rtm_family = AF_INET6;
4483 rtm->rtm_dst_len = rt->rt6i_dst.plen;
4484 rtm->rtm_src_len = rt->rt6i_src.plen;
4485 rtm->rtm_tos = 0;
c71099ac 4486 if (rt->rt6i_table)
9e762a4a 4487 table = rt->rt6i_table->tb6_id;
c71099ac 4488 else
9e762a4a
PM
4489 table = RT6_TABLE_UNSPEC;
4490 rtm->rtm_table = table;
c78679e8
DM
4491 if (nla_put_u32(skb, RTA_TABLE, table))
4492 goto nla_put_failure;
ef2c7d7b
ND
4493 if (rt->rt6i_flags & RTF_REJECT) {
4494 switch (rt->dst.error) {
4495 case -EINVAL:
4496 rtm->rtm_type = RTN_BLACKHOLE;
4497 break;
4498 case -EACCES:
4499 rtm->rtm_type = RTN_PROHIBIT;
4500 break;
b4949ab2
ND
4501 case -EAGAIN:
4502 rtm->rtm_type = RTN_THROW;
4503 break;
ef2c7d7b
ND
4504 default:
4505 rtm->rtm_type = RTN_UNREACHABLE;
4506 break;
4507 }
4508 }
38308473 4509 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 4510 rtm->rtm_type = RTN_LOCAL;
4ee39733
DA
4511 else if (rt->rt6i_flags & RTF_ANYCAST)
4512 rtm->rtm_type = RTN_ANYCAST;
d1918542 4513 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
4514 rtm->rtm_type = RTN_LOCAL;
4515 else
4516 rtm->rtm_type = RTN_UNICAST;
4517 rtm->rtm_flags = 0;
4518 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4519 rtm->rtm_protocol = rt->rt6i_protocol;
1da177e4 4520
38308473 4521 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
4522 rtm->rtm_flags |= RTM_F_CLONED;
4523
4524 if (dst) {
930345ea 4525 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 4526 goto nla_put_failure;
1ab1457c 4527 rtm->rtm_dst_len = 128;
1da177e4 4528 } else if (rtm->rtm_dst_len)
930345ea 4529 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 4530 goto nla_put_failure;
1da177e4
LT
4531#ifdef CONFIG_IPV6_SUBTREES
4532 if (src) {
930345ea 4533 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4534 goto nla_put_failure;
1ab1457c 4535 rtm->rtm_src_len = 128;
c78679e8 4536 } else if (rtm->rtm_src_len &&
930345ea 4537 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 4538 goto nla_put_failure;
1da177e4 4539#endif
7bc570c8
YH
4540 if (iif) {
4541#ifdef CONFIG_IPV6_MROUTE
4542 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
fd61c6ba
DA
4543 int err = ip6mr_get_route(net, skb, rtm, portid);
4544
4545 if (err == 0)
4546 return 0;
4547 if (err < 0)
4548 goto nla_put_failure;
7bc570c8
YH
4549 } else
4550#endif
c78679e8
DM
4551 if (nla_put_u32(skb, RTA_IIF, iif))
4552 goto nla_put_failure;
7bc570c8 4553 } else if (dst) {
1da177e4 4554 struct in6_addr saddr_buf;
c78679e8 4555 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 4556 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4557 goto nla_put_failure;
1da177e4 4558 }
2d7202bf 4559
c3968a85
DW
4560 if (rt->rt6i_prefsrc.plen) {
4561 struct in6_addr saddr_buf;
4e3fd7a0 4562 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 4563 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4564 goto nla_put_failure;
c3968a85
DW
4565 }
4566
4b32b5ad
MKL
4567 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
4568 if (rt->rt6i_pmtu)
4569 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
4570 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
4571 goto nla_put_failure;
4572
c78679e8
DM
4573 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4574 goto nla_put_failure;
8253947e 4575
beb1afac
DA
4576 /* For multipath routes, walk the siblings list and add
4577 * each as a nexthop within RTA_MULTIPATH.
4578 */
4579 if (rt->rt6i_nsiblings) {
4580 struct rt6_info *sibling, *next_sibling;
4581 struct nlattr *mp;
4582
4583 mp = nla_nest_start(skb, RTA_MULTIPATH);
4584 if (!mp)
4585 goto nla_put_failure;
4586
4587 if (rt6_add_nexthop(skb, rt) < 0)
4588 goto nla_put_failure;
4589
4590 list_for_each_entry_safe(sibling, next_sibling,
4591 &rt->rt6i_siblings, rt6i_siblings) {
4592 if (rt6_add_nexthop(skb, sibling) < 0)
4593 goto nla_put_failure;
4594 }
4595
4596 nla_nest_end(skb, mp);
4597 } else {
5be083ce 4598 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4599 goto nla_put_failure;
4600 }
4601
8253947e 4602 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 4603
87a50699 4604 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 4605 goto nla_put_failure;
2d7202bf 4606
c78ba6d6
LR
4607 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4608 goto nla_put_failure;
4609
19e42e45 4610
053c095a
JB
4611 nlmsg_end(skb, nlh);
4612 return 0;
2d7202bf
TG
4613
4614nla_put_failure:
26932566
PM
4615 nlmsg_cancel(skb, nlh);
4616 return -EMSGSIZE;
1da177e4
LT
4617}
4618
1b43af54 4619int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
4620{
4621 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4622 struct net *net = arg->net;
4623
4624 if (rt == net->ipv6.ip6_null_entry)
4625 return 0;
1da177e4 4626
2d7202bf
TG
4627 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4628 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4629
4630 /* user wants prefix routes only */
4631 if (rtm->rtm_flags & RTM_F_PREFIX &&
4632 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4633 /* success since this is not a prefix route */
4634 return 1;
4635 }
4636 }
1da177e4 4637
1f17e2f2 4638 return rt6_fill_node(net,
191cd582 4639 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 4640 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
f8cfe2ce 4641 NLM_F_MULTI);
1da177e4
LT
4642}
4643
c21ef3e3
DA
4644static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4645 struct netlink_ext_ack *extack)
1da177e4 4646{
3b1e0a65 4647 struct net *net = sock_net(in_skb->sk);
ab364a6f 4648 struct nlattr *tb[RTA_MAX+1];
18c3a61c
RP
4649 int err, iif = 0, oif = 0;
4650 struct dst_entry *dst;
ab364a6f 4651 struct rt6_info *rt;
1da177e4 4652 struct sk_buff *skb;
ab364a6f 4653 struct rtmsg *rtm;
4c9483b2 4654 struct flowi6 fl6;
18c3a61c 4655 bool fibmatch;
1da177e4 4656
fceb6435 4657 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4658 extack);
ab364a6f
TG
4659 if (err < 0)
4660 goto errout;
1da177e4 4661
ab364a6f 4662 err = -EINVAL;
4c9483b2 4663 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4664 rtm = nlmsg_data(nlh);
4665 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4666 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4667
ab364a6f
TG
4668 if (tb[RTA_SRC]) {
4669 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4670 goto errout;
4671
4e3fd7a0 4672 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4673 }
4674
4675 if (tb[RTA_DST]) {
4676 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4677 goto errout;
4678
4e3fd7a0 4679 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4680 }
4681
4682 if (tb[RTA_IIF])
4683 iif = nla_get_u32(tb[RTA_IIF]);
4684
4685 if (tb[RTA_OIF])
72331bc0 4686 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4687
2e47b291
LC
4688 if (tb[RTA_MARK])
4689 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4690
622ec2c9
LC
4691 if (tb[RTA_UID])
4692 fl6.flowi6_uid = make_kuid(current_user_ns(),
4693 nla_get_u32(tb[RTA_UID]));
4694 else
4695 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4696
1da177e4
LT
4697 if (iif) {
4698 struct net_device *dev;
72331bc0
SL
4699 int flags = 0;
4700
121622db
FW
4701 rcu_read_lock();
4702
4703 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4704 if (!dev) {
121622db 4705 rcu_read_unlock();
1da177e4 4706 err = -ENODEV;
ab364a6f 4707 goto errout;
1da177e4 4708 }
72331bc0
SL
4709
4710 fl6.flowi6_iif = iif;
4711
4712 if (!ipv6_addr_any(&fl6.saddr))
4713 flags |= RT6_LOOKUP_F_HAS_SADDR;
4714
b75cc8f9 4715 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
4716
4717 rcu_read_unlock();
72331bc0
SL
4718 } else {
4719 fl6.flowi6_oif = oif;
4720
58acfd71 4721 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4722 }
4723
18c3a61c
RP
4724
4725 rt = container_of(dst, struct rt6_info, dst);
4726 if (rt->dst.error) {
4727 err = rt->dst.error;
4728 ip6_rt_put(rt);
4729 goto errout;
1da177e4
LT
4730 }
4731
9d6acb3b
WC
4732 if (rt == net->ipv6.ip6_null_entry) {
4733 err = rt->dst.error;
4734 ip6_rt_put(rt);
4735 goto errout;
4736 }
4737
fba961ab
DM
4738 if (fibmatch && rt->from) {
4739 struct rt6_info *ort = rt->from;
58acfd71
IS
4740
4741 dst_hold(&ort->dst);
4742 ip6_rt_put(rt);
4743 rt = ort;
4744 }
4745
ab364a6f 4746 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4747 if (!skb) {
94e187c0 4748 ip6_rt_put(rt);
ab364a6f
TG
4749 err = -ENOBUFS;
4750 goto errout;
4751 }
1da177e4 4752
d8d1f30b 4753 skb_dst_set(skb, &rt->dst);
18c3a61c
RP
4754 if (fibmatch)
4755 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
4756 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4757 nlh->nlmsg_seq, 0);
4758 else
4759 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
4760 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4761 nlh->nlmsg_seq, 0);
1da177e4 4762 if (err < 0) {
ab364a6f
TG
4763 kfree_skb(skb);
4764 goto errout;
1da177e4
LT
4765 }
4766
15e47304 4767 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4768errout:
1da177e4 4769 return err;
1da177e4
LT
4770}
4771
37a1d361
RP
4772void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4773 unsigned int nlm_flags)
1da177e4
LT
4774{
4775 struct sk_buff *skb;
5578689a 4776 struct net *net = info->nl_net;
528c4ceb
DL
4777 u32 seq;
4778 int err;
4779
4780 err = -ENOBUFS;
38308473 4781 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4782
19e42e45 4783 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4784 if (!skb)
21713ebc
TG
4785 goto errout;
4786
191cd582 4787 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
f8cfe2ce 4788 event, info->portid, seq, nlm_flags);
26932566
PM
4789 if (err < 0) {
4790 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4791 WARN_ON(err == -EMSGSIZE);
4792 kfree_skb(skb);
4793 goto errout;
4794 }
15e47304 4795 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4796 info->nlh, gfp_any());
4797 return;
21713ebc
TG
4798errout:
4799 if (err < 0)
5578689a 4800 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4801}
4802
8ed67789 4803static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4804 unsigned long event, void *ptr)
8ed67789 4805{
351638e7 4806 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 4807 struct net *net = dev_net(dev);
8ed67789 4808
242d3a49
WC
4809 if (!(dev->flags & IFF_LOOPBACK))
4810 return NOTIFY_OK;
4811
4812 if (event == NETDEV_REGISTER) {
d8d1f30b 4813 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
4814 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4815#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 4816 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 4817 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4818 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 4819 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 4820#endif
76da0704
WC
4821 } else if (event == NETDEV_UNREGISTER &&
4822 dev->reg_state != NETREG_UNREGISTERED) {
4823 /* NETDEV_UNREGISTER could be fired for multiple times by
4824 * netdev_wait_allrefs(). Make sure we only call this once.
4825 */
12d94a80 4826 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 4827#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
4828 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4829 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
4830#endif
4831 }
4832
4833 return NOTIFY_OK;
4834}
4835
1da177e4
LT
4836/*
4837 * /proc
4838 */
4839
4840#ifdef CONFIG_PROC_FS
4841
33120b30 4842static const struct file_operations ipv6_route_proc_fops = {
33120b30
AD
4843 .open = ipv6_route_open,
4844 .read = seq_read,
4845 .llseek = seq_lseek,
8d2ca1d7 4846 .release = seq_release_net,
33120b30
AD
4847};
4848
1da177e4
LT
4849static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4850{
69ddb805 4851 struct net *net = (struct net *)seq->private;
1da177e4 4852 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
4853 net->ipv6.rt6_stats->fib_nodes,
4854 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 4855 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
4856 net->ipv6.rt6_stats->fib_rt_entries,
4857 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 4858 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 4859 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
4860
4861 return 0;
4862}
4863
4864static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4865{
de05c557 4866 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
4867}
4868
9a32144e 4869static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
4870 .open = rt6_stats_seq_open,
4871 .read = seq_read,
4872 .llseek = seq_lseek,
b6fcbdb4 4873 .release = single_release_net,
1da177e4
LT
4874};
4875#endif /* CONFIG_PROC_FS */
4876
4877#ifdef CONFIG_SYSCTL
4878
1da177e4 4879static
fe2c6338 4880int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
4881 void __user *buffer, size_t *lenp, loff_t *ppos)
4882{
c486da34
LAG
4883 struct net *net;
4884 int delay;
4885 if (!write)
1da177e4 4886 return -EINVAL;
c486da34
LAG
4887
4888 net = (struct net *)ctl->extra1;
4889 delay = net->ipv6.sysctl.flush_delay;
4890 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 4891 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 4892 return 0;
1da177e4
LT
4893}
4894
fe2c6338 4895struct ctl_table ipv6_route_table_template[] = {
1ab1457c 4896 {
1da177e4 4897 .procname = "flush",
4990509f 4898 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 4899 .maxlen = sizeof(int),
89c8b3a1 4900 .mode = 0200,
6d9f239a 4901 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
4902 },
4903 {
1da177e4 4904 .procname = "gc_thresh",
9a7ec3a9 4905 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
4906 .maxlen = sizeof(int),
4907 .mode = 0644,
6d9f239a 4908 .proc_handler = proc_dointvec,
1da177e4
LT
4909 },
4910 {
1da177e4 4911 .procname = "max_size",
4990509f 4912 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
4913 .maxlen = sizeof(int),
4914 .mode = 0644,
6d9f239a 4915 .proc_handler = proc_dointvec,
1da177e4
LT
4916 },
4917 {
1da177e4 4918 .procname = "gc_min_interval",
4990509f 4919 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4920 .maxlen = sizeof(int),
4921 .mode = 0644,
6d9f239a 4922 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4923 },
4924 {
1da177e4 4925 .procname = "gc_timeout",
4990509f 4926 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
4927 .maxlen = sizeof(int),
4928 .mode = 0644,
6d9f239a 4929 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4930 },
4931 {
1da177e4 4932 .procname = "gc_interval",
4990509f 4933 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
4934 .maxlen = sizeof(int),
4935 .mode = 0644,
6d9f239a 4936 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4937 },
4938 {
1da177e4 4939 .procname = "gc_elasticity",
4990509f 4940 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
4941 .maxlen = sizeof(int),
4942 .mode = 0644,
f3d3f616 4943 .proc_handler = proc_dointvec,
1da177e4
LT
4944 },
4945 {
1da177e4 4946 .procname = "mtu_expires",
4990509f 4947 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
4948 .maxlen = sizeof(int),
4949 .mode = 0644,
6d9f239a 4950 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4951 },
4952 {
1da177e4 4953 .procname = "min_adv_mss",
4990509f 4954 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
4955 .maxlen = sizeof(int),
4956 .mode = 0644,
f3d3f616 4957 .proc_handler = proc_dointvec,
1da177e4
LT
4958 },
4959 {
1da177e4 4960 .procname = "gc_min_interval_ms",
4990509f 4961 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4962 .maxlen = sizeof(int),
4963 .mode = 0644,
6d9f239a 4964 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 4965 },
f8572d8f 4966 { }
1da177e4
LT
4967};
4968
2c8c1e72 4969struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
4970{
4971 struct ctl_table *table;
4972
4973 table = kmemdup(ipv6_route_table_template,
4974 sizeof(ipv6_route_table_template),
4975 GFP_KERNEL);
5ee09105
YH
4976
4977 if (table) {
4978 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 4979 table[0].extra1 = net;
86393e52 4980 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
4981 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
4982 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
4983 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
4984 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
4985 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
4986 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
4987 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 4988 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
4989
4990 /* Don't export sysctls to unprivileged users */
4991 if (net->user_ns != &init_user_ns)
4992 table[0].procname = NULL;
5ee09105
YH
4993 }
4994
760f2d01
DL
4995 return table;
4996}
1da177e4
LT
4997#endif
4998
2c8c1e72 4999static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5000{
633d424b 5001 int ret = -ENOMEM;
8ed67789 5002
86393e52
AD
5003 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5004 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5005
fc66f95c
ED
5006 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5007 goto out_ip6_dst_ops;
5008
8ed67789
DL
5009 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5010 sizeof(*net->ipv6.ip6_null_entry),
5011 GFP_KERNEL);
5012 if (!net->ipv6.ip6_null_entry)
fc66f95c 5013 goto out_ip6_dst_entries;
d8d1f30b 5014 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5015 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5016 ip6_template_metrics, true);
8ed67789
DL
5017
5018#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5019 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5020 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5021 sizeof(*net->ipv6.ip6_prohibit_entry),
5022 GFP_KERNEL);
68fffc67
PZ
5023 if (!net->ipv6.ip6_prohibit_entry)
5024 goto out_ip6_null_entry;
d8d1f30b 5025 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5026 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5027 ip6_template_metrics, true);
8ed67789
DL
5028
5029 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5030 sizeof(*net->ipv6.ip6_blk_hole_entry),
5031 GFP_KERNEL);
68fffc67
PZ
5032 if (!net->ipv6.ip6_blk_hole_entry)
5033 goto out_ip6_prohibit_entry;
d8d1f30b 5034 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5035 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5036 ip6_template_metrics, true);
8ed67789
DL
5037#endif
5038
b339a47c
PZ
5039 net->ipv6.sysctl.flush_delay = 0;
5040 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5041 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5042 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5043 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5044 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5045 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5046 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5047
6891a346
BT
5048 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5049
8ed67789
DL
5050 ret = 0;
5051out:
5052 return ret;
f2fc6a54 5053
68fffc67
PZ
5054#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5055out_ip6_prohibit_entry:
5056 kfree(net->ipv6.ip6_prohibit_entry);
5057out_ip6_null_entry:
5058 kfree(net->ipv6.ip6_null_entry);
5059#endif
fc66f95c
ED
5060out_ip6_dst_entries:
5061 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5062out_ip6_dst_ops:
f2fc6a54 5063 goto out;
cdb18761
DL
5064}
5065
2c8c1e72 5066static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5067{
8ed67789
DL
5068 kfree(net->ipv6.ip6_null_entry);
5069#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5070 kfree(net->ipv6.ip6_prohibit_entry);
5071 kfree(net->ipv6.ip6_blk_hole_entry);
5072#endif
41bb78b4 5073 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5074}
5075
d189634e
TG
5076static int __net_init ip6_route_net_init_late(struct net *net)
5077{
5078#ifdef CONFIG_PROC_FS
d4beaa66 5079 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
d6444062 5080 proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
5081#endif
5082 return 0;
5083}
5084
5085static void __net_exit ip6_route_net_exit_late(struct net *net)
5086{
5087#ifdef CONFIG_PROC_FS
ece31ffd
G
5088 remove_proc_entry("ipv6_route", net->proc_net);
5089 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5090#endif
5091}
5092
cdb18761
DL
5093static struct pernet_operations ip6_route_net_ops = {
5094 .init = ip6_route_net_init,
5095 .exit = ip6_route_net_exit,
5096};
5097
c3426b47
DM
5098static int __net_init ipv6_inetpeer_init(struct net *net)
5099{
5100 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5101
5102 if (!bp)
5103 return -ENOMEM;
5104 inet_peer_base_init(bp);
5105 net->ipv6.peers = bp;
5106 return 0;
5107}
5108
5109static void __net_exit ipv6_inetpeer_exit(struct net *net)
5110{
5111 struct inet_peer_base *bp = net->ipv6.peers;
5112
5113 net->ipv6.peers = NULL;
56a6b248 5114 inetpeer_invalidate_tree(bp);
c3426b47
DM
5115 kfree(bp);
5116}
5117
2b823f72 5118static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5119 .init = ipv6_inetpeer_init,
5120 .exit = ipv6_inetpeer_exit,
5121};
5122
d189634e
TG
5123static struct pernet_operations ip6_route_net_late_ops = {
5124 .init = ip6_route_net_init_late,
5125 .exit = ip6_route_net_exit_late,
5126};
5127
8ed67789
DL
5128static struct notifier_block ip6_route_dev_notifier = {
5129 .notifier_call = ip6_route_dev_notify,
242d3a49 5130 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5131};
5132
2f460933
WC
5133void __init ip6_route_init_special_entries(void)
5134{
5135 /* Registering of the loopback is done before this portion of code,
5136 * the loopback reference in rt6_info will not be taken, do it
5137 * manually for init_net */
5138 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5139 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5140 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5141 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5142 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5143 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5144 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5145 #endif
5146}
5147
433d49c3 5148int __init ip6_route_init(void)
1da177e4 5149{
433d49c3 5150 int ret;
8d0b94af 5151 int cpu;
433d49c3 5152
9a7ec3a9
DL
5153 ret = -ENOMEM;
5154 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5155 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5156 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5157 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5158 goto out;
14e50e57 5159
fc66f95c 5160 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5161 if (ret)
bdb3289f 5162 goto out_kmem_cache;
bdb3289f 5163
c3426b47
DM
5164 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5165 if (ret)
e8803b6c 5166 goto out_dst_entries;
2a0c451a 5167
7e52b33b
DM
5168 ret = register_pernet_subsys(&ip6_route_net_ops);
5169 if (ret)
5170 goto out_register_inetpeer;
c3426b47 5171
5dc121e9
AE
5172 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5173
e8803b6c 5174 ret = fib6_init();
433d49c3 5175 if (ret)
8ed67789 5176 goto out_register_subsys;
433d49c3 5177
433d49c3
DL
5178 ret = xfrm6_init();
5179 if (ret)
e8803b6c 5180 goto out_fib6_init;
c35b7e72 5181
433d49c3
DL
5182 ret = fib6_rules_init();
5183 if (ret)
5184 goto xfrm6_init;
7e5449c2 5185
d189634e
TG
5186 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5187 if (ret)
5188 goto fib6_rules_init;
5189
16feebcf
FW
5190 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5191 inet6_rtm_newroute, NULL, 0);
5192 if (ret < 0)
5193 goto out_register_late_subsys;
5194
5195 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5196 inet6_rtm_delroute, NULL, 0);
5197 if (ret < 0)
5198 goto out_register_late_subsys;
5199
5200 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5201 inet6_rtm_getroute, NULL,
5202 RTNL_FLAG_DOIT_UNLOCKED);
5203 if (ret < 0)
d189634e 5204 goto out_register_late_subsys;
c127ea2c 5205
8ed67789 5206 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5207 if (ret)
d189634e 5208 goto out_register_late_subsys;
8ed67789 5209
8d0b94af
MKL
5210 for_each_possible_cpu(cpu) {
5211 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5212
5213 INIT_LIST_HEAD(&ul->head);
5214 spin_lock_init(&ul->lock);
5215 }
5216
433d49c3
DL
5217out:
5218 return ret;
5219
d189634e 5220out_register_late_subsys:
16feebcf 5221 rtnl_unregister_all(PF_INET6);
d189634e 5222 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5223fib6_rules_init:
433d49c3
DL
5224 fib6_rules_cleanup();
5225xfrm6_init:
433d49c3 5226 xfrm6_fini();
2a0c451a
TG
5227out_fib6_init:
5228 fib6_gc_cleanup();
8ed67789
DL
5229out_register_subsys:
5230 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5231out_register_inetpeer:
5232 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5233out_dst_entries:
5234 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5235out_kmem_cache:
f2fc6a54 5236 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5237 goto out;
1da177e4
LT
5238}
5239
5240void ip6_route_cleanup(void)
5241{
8ed67789 5242 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5243 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5244 fib6_rules_cleanup();
1da177e4 5245 xfrm6_fini();
1da177e4 5246 fib6_gc_cleanup();
c3426b47 5247 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5248 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5249 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5250 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5251}