]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - net/ipv6/route.c
ipv6: route: enforce RCU protection in rt6_update_exception_stamp_rt()
[thirdparty/kernel/stable.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
eacb9384 66#include <net/ip.h>
7c0f6ba6 67#include <linux/uaccess.h>
1da177e4
LT
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
30d444d3
DA
73static int ip6_rt_type_to_error(u8 fib6_type);
74
75#define CREATE_TRACE_POINTS
76#include <trace/events/fib6.h>
77EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
78#undef CREATE_TRACE_POINTS
79
afc154e9 80enum rt6_nud_state {
7e980569
JB
81 RT6_NUD_FAIL_HARD = -3,
82 RT6_NUD_FAIL_PROBE = -2,
83 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
84 RT6_NUD_SUCCEED = 1
85};
86
1da177e4 87static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 88static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 89static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
569d3645 94static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
95
96static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 97static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 98static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 99static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 100static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
101static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
102 struct sk_buff *skb, u32 mtu);
103static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
104 struct sk_buff *skb);
8d1c802b
DA
105static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
106static size_t rt6_nlmsg_size(struct fib6_info *rt);
d4ead6b3 107static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 108 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 109 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
110 int iif, int type, u32 portid, u32 seq,
111 unsigned int flags);
8d1c802b 112static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
113 struct in6_addr *daddr,
114 struct in6_addr *saddr);
1da177e4 115
70ceb4f5 116#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 117static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 118 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
119 const struct in6_addr *gwaddr,
120 struct net_device *dev,
95c96174 121 unsigned int pref);
8d1c802b 122static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 123 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
124 const struct in6_addr *gwaddr,
125 struct net_device *dev);
70ceb4f5
YH
126#endif
127
8d0b94af
MKL
128struct uncached_list {
129 spinlock_t lock;
130 struct list_head head;
131};
132
133static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
134
510c321b 135void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
136{
137 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
138
8d0b94af
MKL
139 rt->rt6i_uncached_list = ul;
140
141 spin_lock_bh(&ul->lock);
142 list_add_tail(&rt->rt6i_uncached, &ul->head);
143 spin_unlock_bh(&ul->lock);
144}
145
510c321b 146void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
147{
148 if (!list_empty(&rt->rt6i_uncached)) {
149 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 150 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
151
152 spin_lock_bh(&ul->lock);
153 list_del(&rt->rt6i_uncached);
81eb8447 154 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
155 spin_unlock_bh(&ul->lock);
156 }
157}
158
159static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
160{
161 struct net_device *loopback_dev = net->loopback_dev;
162 int cpu;
163
e332bc67
EB
164 if (dev == loopback_dev)
165 return;
166
8d0b94af
MKL
167 for_each_possible_cpu(cpu) {
168 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
169 struct rt6_info *rt;
170
171 spin_lock_bh(&ul->lock);
172 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
173 struct inet6_dev *rt_idev = rt->rt6i_idev;
174 struct net_device *rt_dev = rt->dst.dev;
175
e332bc67 176 if (rt_idev->dev == dev) {
8d0b94af
MKL
177 rt->rt6i_idev = in6_dev_get(loopback_dev);
178 in6_dev_put(rt_idev);
179 }
180
e332bc67 181 if (rt_dev == dev) {
8d0b94af
MKL
182 rt->dst.dev = loopback_dev;
183 dev_hold(rt->dst.dev);
184 dev_put(rt_dev);
185 }
186 }
187 spin_unlock_bh(&ul->lock);
188 }
189}
190
f8a1b43b 191static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
192 struct sk_buff *skb,
193 const void *daddr)
39232973 194{
a7563f34 195 if (!ipv6_addr_any(p))
39232973 196 return (const void *) p;
f894cbf8
DM
197 else if (skb)
198 return &ipv6_hdr(skb)->daddr;
39232973
DM
199 return daddr;
200}
201
f8a1b43b
DA
202struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
203 struct net_device *dev,
204 struct sk_buff *skb,
205 const void *daddr)
d3aaeb38 206{
39232973
DM
207 struct neighbour *n;
208
f8a1b43b
DA
209 daddr = choose_neigh_daddr(gw, skb, daddr);
210 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
211 if (n)
212 return n;
f8a1b43b
DA
213 return neigh_create(&nd_tbl, daddr, dev);
214}
215
216static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
217 struct sk_buff *skb,
218 const void *daddr)
219{
220 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
221
222 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
f83c7790
DM
223}
224
63fca65d
JA
225static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
226{
227 struct net_device *dev = dst->dev;
228 struct rt6_info *rt = (struct rt6_info *)dst;
229
f8a1b43b 230 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
63fca65d
JA
231 if (!daddr)
232 return;
233 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
234 return;
235 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
236 return;
237 __ipv6_confirm_neigh(dev, daddr);
238}
239
9a7ec3a9 240static struct dst_ops ip6_dst_ops_template = {
1da177e4 241 .family = AF_INET6,
1da177e4
LT
242 .gc = ip6_dst_gc,
243 .gc_thresh = 1024,
244 .check = ip6_dst_check,
0dbaee3b 245 .default_advmss = ip6_default_advmss,
ebb762f2 246 .mtu = ip6_mtu,
d4ead6b3 247 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
248 .destroy = ip6_dst_destroy,
249 .ifdown = ip6_dst_ifdown,
250 .negative_advice = ip6_negative_advice,
251 .link_failure = ip6_link_failure,
252 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 253 .redirect = rt6_do_redirect,
9f8955cc 254 .local_out = __ip6_local_out,
f8a1b43b 255 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 256 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
257};
258
ebb762f2 259static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 260{
618f9bc7
SK
261 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
262
263 return mtu ? : dst->dev->mtu;
ec831ea7
RD
264}
265
6700c270
DM
266static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
267 struct sk_buff *skb, u32 mtu)
14e50e57
DM
268{
269}
270
6700c270
DM
271static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
272 struct sk_buff *skb)
b587ee3b
DM
273{
274}
275
14e50e57
DM
276static struct dst_ops ip6_dst_blackhole_ops = {
277 .family = AF_INET6,
14e50e57
DM
278 .destroy = ip6_dst_destroy,
279 .check = ip6_dst_check,
ebb762f2 280 .mtu = ip6_blackhole_mtu,
214f45c9 281 .default_advmss = ip6_default_advmss,
14e50e57 282 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 283 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 284 .cow_metrics = dst_cow_metrics_generic,
f8a1b43b 285 .neigh_lookup = ip6_dst_neigh_lookup,
14e50e57
DM
286};
287
62fa8a84 288static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 289 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
290};
291
8d1c802b 292static const struct fib6_info fib6_null_entry_template = {
93c2fb25
DA
293 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
294 .fib6_protocol = RTPROT_KERNEL,
295 .fib6_metric = ~(u32)0,
296 .fib6_ref = ATOMIC_INIT(1),
421842ed
DA
297 .fib6_type = RTN_UNREACHABLE,
298 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
299};
300
fb0af4c7 301static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
302 .dst = {
303 .__refcnt = ATOMIC_INIT(1),
304 .__use = 1,
2c20cbd7 305 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 306 .error = -ENETUNREACH,
d8d1f30b
CG
307 .input = ip6_pkt_discard,
308 .output = ip6_pkt_discard_out,
1da177e4
LT
309 },
310 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
1da177e4
LT
311};
312
101367c2
TG
313#ifdef CONFIG_IPV6_MULTIPLE_TABLES
314
fb0af4c7 315static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
316 .dst = {
317 .__refcnt = ATOMIC_INIT(1),
318 .__use = 1,
2c20cbd7 319 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 320 .error = -EACCES,
d8d1f30b
CG
321 .input = ip6_pkt_prohibit,
322 .output = ip6_pkt_prohibit_out,
101367c2
TG
323 },
324 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
325};
326
fb0af4c7 327static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
328 .dst = {
329 .__refcnt = ATOMIC_INIT(1),
330 .__use = 1,
2c20cbd7 331 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 332 .error = -EINVAL,
d8d1f30b 333 .input = dst_discard,
ede2059d 334 .output = dst_discard_out,
101367c2
TG
335 },
336 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
337};
338
339#endif
340
ebfa45f0
MKL
341static void rt6_info_init(struct rt6_info *rt)
342{
343 struct dst_entry *dst = &rt->dst;
344
345 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
ebfa45f0
MKL
346 INIT_LIST_HEAD(&rt->rt6i_uncached);
347}
348
1da177e4 349/* allocate dst with ip6_dst_ops */
93531c67
DA
350struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
351 int flags)
1da177e4 352{
97bab73f 353 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 354 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 355
81eb8447 356 if (rt) {
ebfa45f0 357 rt6_info_init(rt);
81eb8447
WW
358 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
359 }
8104891b 360
cf911662 361 return rt;
1da177e4 362}
9ab179d8 363EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 364
1da177e4
LT
365static void ip6_dst_destroy(struct dst_entry *dst)
366{
367 struct rt6_info *rt = (struct rt6_info *)dst;
a68886a6 368 struct fib6_info *from;
8d0b94af 369 struct inet6_dev *idev;
1da177e4 370
1620a336 371 ip_dst_metrics_put(dst);
8d0b94af
MKL
372 rt6_uncached_list_del(rt);
373
374 idev = rt->rt6i_idev;
38308473 375 if (idev) {
1da177e4
LT
376 rt->rt6i_idev = NULL;
377 in6_dev_put(idev);
1ab1457c 378 }
1716a961 379
a68886a6
DA
380 rcu_read_lock();
381 from = rcu_dereference(rt->from);
382 rcu_assign_pointer(rt->from, NULL);
93531c67 383 fib6_info_release(from);
a68886a6 384 rcu_read_unlock();
b3419363
DM
385}
386
1da177e4
LT
387static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
388 int how)
389{
390 struct rt6_info *rt = (struct rt6_info *)dst;
391 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 392 struct net_device *loopback_dev =
c346dca1 393 dev_net(dev)->loopback_dev;
1da177e4 394
e5645f51
WW
395 if (idev && idev->dev != loopback_dev) {
396 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
397 if (loopback_idev) {
398 rt->rt6i_idev = loopback_idev;
399 in6_dev_put(idev);
97cac082 400 }
1da177e4
LT
401 }
402}
403
5973fb1e
MKL
404static bool __rt6_check_expired(const struct rt6_info *rt)
405{
406 if (rt->rt6i_flags & RTF_EXPIRES)
407 return time_after(jiffies, rt->dst.expires);
408 else
409 return false;
410}
411
a50feda5 412static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 413{
a68886a6
DA
414 struct fib6_info *from;
415
416 from = rcu_dereference(rt->from);
417
1716a961
G
418 if (rt->rt6i_flags & RTF_EXPIRES) {
419 if (time_after(jiffies, rt->dst.expires))
a50feda5 420 return true;
a68886a6 421 } else if (from) {
1e2ea8ad 422 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
a68886a6 423 fib6_check_expired(from);
1716a961 424 }
a50feda5 425 return false;
1da177e4
LT
426}
427
3b290a31
DA
428struct fib6_info *fib6_multipath_select(const struct net *net,
429 struct fib6_info *match,
430 struct flowi6 *fl6, int oif,
431 const struct sk_buff *skb,
432 int strict)
51ebd318 433{
8d1c802b 434 struct fib6_info *sibling, *next_sibling;
51ebd318 435
b673d6cc
JS
436 /* We might have already computed the hash for ICMPv6 errors. In such
437 * case it will always be non-zero. Otherwise now is the time to do it.
438 */
439 if (!fl6->mp_hash)
b4bac172 440 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 441
5e670d84 442 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
3d709f69
IS
443 return match;
444
93c2fb25
DA
445 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
446 fib6_siblings) {
5e670d84
DA
447 int nh_upper_bound;
448
449 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
450 if (fl6->mp_hash > nh_upper_bound)
3d709f69
IS
451 continue;
452 if (rt6_score_route(sibling, oif, strict) < 0)
453 break;
454 match = sibling;
455 break;
456 }
457
51ebd318
ND
458 return match;
459}
460
1da177e4 461/*
66f5d6ce 462 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
463 */
464
8d1c802b
DA
465static inline struct fib6_info *rt6_device_match(struct net *net,
466 struct fib6_info *rt,
b71d1d42 467 const struct in6_addr *saddr,
1da177e4 468 int oif,
d420895e 469 int flags)
1da177e4 470{
8d1c802b 471 struct fib6_info *sprt;
1da177e4 472
5e670d84
DA
473 if (!oif && ipv6_addr_any(saddr) &&
474 !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
8067bb8c 475 return rt;
dd3abc4e 476
8fb11a9a 477 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
5e670d84 478 const struct net_device *dev = sprt->fib6_nh.nh_dev;
dd3abc4e 479
5e670d84 480 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
481 continue;
482
dd3abc4e 483 if (oif) {
1da177e4
LT
484 if (dev->ifindex == oif)
485 return sprt;
dd3abc4e
YH
486 } else {
487 if (ipv6_chk_addr(net, saddr, dev,
488 flags & RT6_LOOKUP_F_IFACE))
489 return sprt;
1da177e4 490 }
dd3abc4e 491 }
1da177e4 492
eea68cd3
DA
493 if (oif && flags & RT6_LOOKUP_F_IFACE)
494 return net->ipv6.fib6_null_entry;
8067bb8c 495
421842ed 496 return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
1da177e4
LT
497}
498
27097255 499#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
500struct __rt6_probe_work {
501 struct work_struct work;
502 struct in6_addr target;
503 struct net_device *dev;
504};
505
506static void rt6_probe_deferred(struct work_struct *w)
507{
508 struct in6_addr mcaddr;
509 struct __rt6_probe_work *work =
510 container_of(w, struct __rt6_probe_work, work);
511
512 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 513 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 514 dev_put(work->dev);
662f5533 515 kfree(work);
c2f17e82
HFS
516}
517
8d1c802b 518static void rt6_probe(struct fib6_info *rt)
27097255 519{
f547fac6 520 struct __rt6_probe_work *work = NULL;
5e670d84 521 const struct in6_addr *nh_gw;
f2c31e32 522 struct neighbour *neigh;
5e670d84 523 struct net_device *dev;
f547fac6 524 struct inet6_dev *idev;
5e670d84 525
27097255
YH
526 /*
527 * Okay, this does not seem to be appropriate
528 * for now, however, we need to check if it
529 * is really so; aka Router Reachability Probing.
530 *
531 * Router Reachability Probe MUST be rate-limited
532 * to no more than one per minute.
533 */
93c2fb25 534 if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
7ff74a59 535 return;
5e670d84
DA
536
537 nh_gw = &rt->fib6_nh.nh_gw;
538 dev = rt->fib6_nh.nh_dev;
2152caea 539 rcu_read_lock_bh();
f547fac6 540 idev = __in6_dev_get(dev);
5e670d84 541 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 542 if (neigh) {
8d6c31bf
MKL
543 if (neigh->nud_state & NUD_VALID)
544 goto out;
545
2152caea 546 write_lock(&neigh->lock);
990edb42
MKL
547 if (!(neigh->nud_state & NUD_VALID) &&
548 time_after(jiffies,
dcd1f572 549 neigh->updated + idev->cnf.rtr_probe_interval)) {
990edb42
MKL
550 work = kmalloc(sizeof(*work), GFP_ATOMIC);
551 if (work)
552 __neigh_set_probe_once(neigh);
c2f17e82 553 }
2152caea 554 write_unlock(&neigh->lock);
f547fac6
SD
555 } else if (time_after(jiffies, rt->last_probe +
556 idev->cnf.rtr_probe_interval)) {
990edb42 557 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 558 }
990edb42
MKL
559
560 if (work) {
f547fac6 561 rt->last_probe = jiffies;
990edb42 562 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
563 work->target = *nh_gw;
564 dev_hold(dev);
565 work->dev = dev;
990edb42
MKL
566 schedule_work(&work->work);
567 }
568
8d6c31bf 569out:
2152caea 570 rcu_read_unlock_bh();
27097255
YH
571}
572#else
8d1c802b 573static inline void rt6_probe(struct fib6_info *rt)
27097255 574{
27097255
YH
575}
576#endif
577
1da177e4 578/*
554cfb7e 579 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 580 */
8d1c802b 581static inline int rt6_check_dev(struct fib6_info *rt, int oif)
554cfb7e 582{
5e670d84
DA
583 const struct net_device *dev = rt->fib6_nh.nh_dev;
584
161980f4 585 if (!oif || dev->ifindex == oif)
554cfb7e 586 return 2;
161980f4 587 return 0;
554cfb7e 588}
1da177e4 589
8d1c802b 590static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
1da177e4 591{
afc154e9 592 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 593 struct neighbour *neigh;
f2c31e32 594
93c2fb25
DA
595 if (rt->fib6_flags & RTF_NONEXTHOP ||
596 !(rt->fib6_flags & RTF_GATEWAY))
afc154e9 597 return RT6_NUD_SUCCEED;
145a3621
YH
598
599 rcu_read_lock_bh();
5e670d84
DA
600 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
601 &rt->fib6_nh.nh_gw);
145a3621
YH
602 if (neigh) {
603 read_lock(&neigh->lock);
554cfb7e 604 if (neigh->nud_state & NUD_VALID)
afc154e9 605 ret = RT6_NUD_SUCCEED;
398bcbeb 606#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 607 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 608 ret = RT6_NUD_SUCCEED;
7e980569
JB
609 else
610 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 611#endif
145a3621 612 read_unlock(&neigh->lock);
afc154e9
HFS
613 } else {
614 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 615 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 616 }
145a3621
YH
617 rcu_read_unlock_bh();
618
a5a81f0b 619 return ret;
1da177e4
LT
620}
621
8d1c802b 622static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
1da177e4 623{
a5a81f0b 624 int m;
1ab1457c 625
4d0c5911 626 m = rt6_check_dev(rt, oif);
77d16f45 627 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 628 return RT6_NUD_FAIL_HARD;
ebacaaa0 629#ifdef CONFIG_IPV6_ROUTER_PREF
93c2fb25 630 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
ebacaaa0 631#endif
afc154e9
HFS
632 if (strict & RT6_LOOKUP_F_REACHABLE) {
633 int n = rt6_check_neigh(rt);
634 if (n < 0)
635 return n;
636 }
554cfb7e
YH
637 return m;
638}
639
dcd1f572
DA
640/* called with rc_read_lock held */
641static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
642{
643 const struct net_device *dev = fib6_info_nh_dev(f6i);
644 bool rc = false;
645
646 if (dev) {
647 const struct inet6_dev *idev = __in6_dev_get(dev);
648
649 rc = !!idev->cnf.ignore_routes_with_linkdown;
650 }
651
652 return rc;
653}
654
8d1c802b
DA
655static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
656 int *mpri, struct fib6_info *match,
afc154e9 657 bool *do_rr)
554cfb7e 658{
f11e6659 659 int m;
afc154e9 660 bool match_do_rr = false;
35103d11 661
5e670d84 662 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
663 goto out;
664
dcd1f572 665 if (fib6_ignore_linkdown(rt) &&
5e670d84 666 rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 667 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 668 goto out;
f11e6659 669
14895687 670 if (fib6_check_expired(rt))
f11e6659
DM
671 goto out;
672
673 m = rt6_score_route(rt, oif, strict);
7e980569 674 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
675 match_do_rr = true;
676 m = 0; /* lowest valid score */
7e980569 677 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 678 goto out;
afc154e9
HFS
679 }
680
681 if (strict & RT6_LOOKUP_F_REACHABLE)
682 rt6_probe(rt);
f11e6659 683
7e980569 684 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 685 if (m > *mpri) {
afc154e9 686 *do_rr = match_do_rr;
f11e6659
DM
687 *mpri = m;
688 match = rt;
f11e6659 689 }
f11e6659
DM
690out:
691 return match;
692}
693
8d1c802b
DA
694static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
695 struct fib6_info *leaf,
696 struct fib6_info *rr_head,
afc154e9
HFS
697 u32 metric, int oif, int strict,
698 bool *do_rr)
f11e6659 699{
8d1c802b 700 struct fib6_info *rt, *match, *cont;
554cfb7e 701 int mpri = -1;
1da177e4 702
f11e6659 703 match = NULL;
9fbdcfaf 704 cont = NULL;
8fb11a9a 705 for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 706 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
707 cont = rt;
708 break;
709 }
710
711 match = find_match(rt, oif, strict, &mpri, match, do_rr);
712 }
713
66f5d6ce 714 for (rt = leaf; rt && rt != rr_head;
8fb11a9a 715 rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 716 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
717 cont = rt;
718 break;
719 }
720
afc154e9 721 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
722 }
723
724 if (match || !cont)
725 return match;
726
8fb11a9a 727 for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
afc154e9 728 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 729
f11e6659
DM
730 return match;
731}
1da177e4 732
8d1c802b 733static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
8d1040e8 734 int oif, int strict)
f11e6659 735{
8d1c802b
DA
736 struct fib6_info *leaf = rcu_dereference(fn->leaf);
737 struct fib6_info *match, *rt0;
afc154e9 738 bool do_rr = false;
17ecf590 739 int key_plen;
1da177e4 740
421842ed
DA
741 if (!leaf || leaf == net->ipv6.fib6_null_entry)
742 return net->ipv6.fib6_null_entry;
8d1040e8 743
66f5d6ce 744 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 745 if (!rt0)
66f5d6ce 746 rt0 = leaf;
1da177e4 747
17ecf590
WW
748 /* Double check to make sure fn is not an intermediate node
749 * and fn->leaf does not points to its child's leaf
750 * (This might happen if all routes under fn are deleted from
751 * the tree and fib6_repair_tree() is called on the node.)
752 */
93c2fb25 753 key_plen = rt0->fib6_dst.plen;
17ecf590 754#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
755 if (rt0->fib6_src.plen)
756 key_plen = rt0->fib6_src.plen;
17ecf590
WW
757#endif
758 if (fn->fn_bit != key_plen)
421842ed 759 return net->ipv6.fib6_null_entry;
17ecf590 760
93c2fb25 761 match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
afc154e9 762 &do_rr);
1da177e4 763
afc154e9 764 if (do_rr) {
8fb11a9a 765 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
f11e6659 766
554cfb7e 767 /* no entries matched; do round-robin */
93c2fb25 768 if (!next || next->fib6_metric != rt0->fib6_metric)
8d1040e8 769 next = leaf;
f11e6659 770
66f5d6ce 771 if (next != rt0) {
93c2fb25 772 spin_lock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 773 /* make sure next is not being deleted from the tree */
93c2fb25 774 if (next->fib6_node)
66f5d6ce 775 rcu_assign_pointer(fn->rr_ptr, next);
93c2fb25 776 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 777 }
1da177e4 778 }
1da177e4 779
421842ed 780 return match ? match : net->ipv6.fib6_null_entry;
1da177e4
LT
781}
782
8d1c802b 783static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
8b9df265 784{
93c2fb25 785 return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
8b9df265
MKL
786}
787
70ceb4f5
YH
788#ifdef CONFIG_IPV6_ROUTE_INFO
789int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 790 const struct in6_addr *gwaddr)
70ceb4f5 791{
c346dca1 792 struct net *net = dev_net(dev);
70ceb4f5
YH
793 struct route_info *rinfo = (struct route_info *) opt;
794 struct in6_addr prefix_buf, *prefix;
795 unsigned int pref;
4bed72e4 796 unsigned long lifetime;
8d1c802b 797 struct fib6_info *rt;
70ceb4f5
YH
798
799 if (len < sizeof(struct route_info)) {
800 return -EINVAL;
801 }
802
803 /* Sanity check for prefix_len and length */
804 if (rinfo->length > 3) {
805 return -EINVAL;
806 } else if (rinfo->prefix_len > 128) {
807 return -EINVAL;
808 } else if (rinfo->prefix_len > 64) {
809 if (rinfo->length < 2) {
810 return -EINVAL;
811 }
812 } else if (rinfo->prefix_len > 0) {
813 if (rinfo->length < 1) {
814 return -EINVAL;
815 }
816 }
817
818 pref = rinfo->route_pref;
819 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 820 return -EINVAL;
70ceb4f5 821
4bed72e4 822 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
823
824 if (rinfo->length == 3)
825 prefix = (struct in6_addr *)rinfo->prefix;
826 else {
827 /* this function is safe */
828 ipv6_addr_prefix(&prefix_buf,
829 (struct in6_addr *)rinfo->prefix,
830 rinfo->prefix_len);
831 prefix = &prefix_buf;
832 }
833
f104a567 834 if (rinfo->prefix_len == 0)
afb1d4b5 835 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
836 else
837 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 838 gwaddr, dev);
70ceb4f5
YH
839
840 if (rt && !lifetime) {
afb1d4b5 841 ip6_del_rt(net, rt);
70ceb4f5
YH
842 rt = NULL;
843 }
844
845 if (!rt && lifetime)
830218c1
DA
846 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
847 dev, pref);
70ceb4f5 848 else if (rt)
93c2fb25
DA
849 rt->fib6_flags = RTF_ROUTEINFO |
850 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
70ceb4f5
YH
851
852 if (rt) {
1716a961 853 if (!addrconf_finite_timeout(lifetime))
14895687 854 fib6_clean_expires(rt);
1716a961 855 else
14895687 856 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 857
93531c67 858 fib6_info_release(rt);
70ceb4f5
YH
859 }
860 return 0;
861}
862#endif
863
ae90d867
DA
864/*
865 * Misc support functions
866 */
867
868/* called with rcu_lock held */
8d1c802b 869static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
ae90d867 870{
5e670d84 871 struct net_device *dev = rt->fib6_nh.nh_dev;
ae90d867 872
93c2fb25 873 if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
ae90d867
DA
874 /* for copies of local routes, dst->dev needs to be the
875 * device if it is a master device, the master device if
876 * device is enslaved, and the loopback as the default
877 */
878 if (netif_is_l3_slave(dev) &&
93c2fb25 879 !rt6_need_strict(&rt->fib6_dst.addr))
ae90d867
DA
880 dev = l3mdev_master_dev_rcu(dev);
881 else if (!netif_is_l3_master(dev))
882 dev = dev_net(dev)->loopback_dev;
883 /* last case is netif_is_l3_master(dev) is true in which
884 * case we want dev returned to be dev
885 */
886 }
887
888 return dev;
889}
890
6edb3c96
DA
891static const int fib6_prop[RTN_MAX + 1] = {
892 [RTN_UNSPEC] = 0,
893 [RTN_UNICAST] = 0,
894 [RTN_LOCAL] = 0,
895 [RTN_BROADCAST] = 0,
896 [RTN_ANYCAST] = 0,
897 [RTN_MULTICAST] = 0,
898 [RTN_BLACKHOLE] = -EINVAL,
899 [RTN_UNREACHABLE] = -EHOSTUNREACH,
900 [RTN_PROHIBIT] = -EACCES,
901 [RTN_THROW] = -EAGAIN,
902 [RTN_NAT] = -EINVAL,
903 [RTN_XRESOLVE] = -EINVAL,
904};
905
906static int ip6_rt_type_to_error(u8 fib6_type)
907{
908 return fib6_prop[fib6_type];
909}
910
8d1c802b 911static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
3b6761d1
DA
912{
913 unsigned short flags = 0;
914
915 if (rt->dst_nocount)
916 flags |= DST_NOCOUNT;
917 if (rt->dst_nopolicy)
918 flags |= DST_NOPOLICY;
919 if (rt->dst_host)
920 flags |= DST_HOST;
921
922 return flags;
923}
924
8d1c802b 925static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96
DA
926{
927 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
928
929 switch (ort->fib6_type) {
930 case RTN_BLACKHOLE:
931 rt->dst.output = dst_discard_out;
932 rt->dst.input = dst_discard;
933 break;
934 case RTN_PROHIBIT:
935 rt->dst.output = ip6_pkt_prohibit_out;
936 rt->dst.input = ip6_pkt_prohibit;
937 break;
938 case RTN_THROW:
939 case RTN_UNREACHABLE:
940 default:
941 rt->dst.output = ip6_pkt_discard_out;
942 rt->dst.input = ip6_pkt_discard;
943 break;
944 }
945}
946
8d1c802b 947static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96 948{
93c2fb25 949 if (ort->fib6_flags & RTF_REJECT) {
6edb3c96
DA
950 ip6_rt_init_dst_reject(rt, ort);
951 return;
952 }
953
954 rt->dst.error = 0;
955 rt->dst.output = ip6_output;
956
d23c4b63 957 if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
6edb3c96 958 rt->dst.input = ip6_input;
93c2fb25 959 } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
6edb3c96
DA
960 rt->dst.input = ip6_mc_input;
961 } else {
962 rt->dst.input = ip6_forward;
963 }
964
965 if (ort->fib6_nh.nh_lwtstate) {
966 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
967 lwtunnel_set_redirect(&rt->dst);
968 }
969
970 rt->dst.lastuse = jiffies;
971}
972
e873e4b9 973/* Caller must already hold reference to @from */
8d1c802b 974static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
ae90d867 975{
ae90d867 976 rt->rt6i_flags &= ~RTF_EXPIRES;
a68886a6 977 rcu_assign_pointer(rt->from, from);
e1255ed4 978 ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
ae90d867
DA
979}
980
e873e4b9 981/* Caller must already hold reference to @ort */
8d1c802b 982static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
ae90d867 983{
dcd1f572
DA
984 struct net_device *dev = fib6_info_nh_dev(ort);
985
6edb3c96
DA
986 ip6_rt_init_dst(rt, ort);
987
93c2fb25 988 rt->rt6i_dst = ort->fib6_dst;
dcd1f572 989 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
5e670d84 990 rt->rt6i_gateway = ort->fib6_nh.nh_gw;
93c2fb25 991 rt->rt6i_flags = ort->fib6_flags;
ae90d867 992 rt6_set_from(rt, ort);
ae90d867 993#ifdef CONFIG_IPV6_SUBTREES
93c2fb25 994 rt->rt6i_src = ort->fib6_src;
ae90d867 995#endif
ae90d867
DA
996}
997
a3c00e46
MKL
998static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
999 struct in6_addr *saddr)
1000{
66f5d6ce 1001 struct fib6_node *pn, *sn;
a3c00e46
MKL
1002 while (1) {
1003 if (fn->fn_flags & RTN_TL_ROOT)
1004 return NULL;
66f5d6ce
WW
1005 pn = rcu_dereference(fn->parent);
1006 sn = FIB6_SUBTREE(pn);
1007 if (sn && sn != fn)
6454743b 1008 fn = fib6_node_lookup(sn, NULL, saddr);
a3c00e46
MKL
1009 else
1010 fn = pn;
1011 if (fn->fn_flags & RTN_RTINFO)
1012 return fn;
1013 }
1014}
c71099ac 1015
d3843fe5
WW
1016static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1017 bool null_fallback)
1018{
1019 struct rt6_info *rt = *prt;
1020
1021 if (dst_hold_safe(&rt->dst))
1022 return true;
1023 if (null_fallback) {
1024 rt = net->ipv6.ip6_null_entry;
1025 dst_hold(&rt->dst);
1026 } else {
1027 rt = NULL;
1028 }
1029 *prt = rt;
1030 return false;
1031}
1032
dec9b0e2 1033/* called with rcu_lock held */
8d1c802b 1034static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
dec9b0e2 1035{
3b6761d1 1036 unsigned short flags = fib6_info_dst_flags(rt);
dec9b0e2
DA
1037 struct net_device *dev = rt->fib6_nh.nh_dev;
1038 struct rt6_info *nrt;
1039
e873e4b9
WW
1040 if (!fib6_info_hold_safe(rt))
1041 return NULL;
1042
93531c67 1043 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
dec9b0e2
DA
1044 if (nrt)
1045 ip6_rt_copy_init(nrt, rt);
e873e4b9
WW
1046 else
1047 fib6_info_release(rt);
dec9b0e2
DA
1048
1049 return nrt;
1050}
1051
8ed67789
DL
1052static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1053 struct fib6_table *table,
b75cc8f9
DA
1054 struct flowi6 *fl6,
1055 const struct sk_buff *skb,
1056 int flags)
1da177e4 1057{
8d1c802b 1058 struct fib6_info *f6i;
1da177e4 1059 struct fib6_node *fn;
23fb93a4 1060 struct rt6_info *rt;
1da177e4 1061
b6cdbc85
DA
1062 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1063 flags &= ~RT6_LOOKUP_F_IFACE;
1064
66f5d6ce 1065 rcu_read_lock();
6454743b 1066 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1067restart:
23fb93a4
DA
1068 f6i = rcu_dereference(fn->leaf);
1069 if (!f6i) {
1070 f6i = net->ipv6.fib6_null_entry;
66f5d6ce 1071 } else {
23fb93a4 1072 f6i = rt6_device_match(net, f6i, &fl6->saddr,
66f5d6ce 1073 fl6->flowi6_oif, flags);
93c2fb25 1074 if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
3b290a31
DA
1075 f6i = fib6_multipath_select(net, f6i, fl6,
1076 fl6->flowi6_oif, skb,
1077 flags);
66f5d6ce 1078 }
23fb93a4 1079 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1080 fn = fib6_backtrack(fn, &fl6->saddr);
1081 if (fn)
1082 goto restart;
1083 }
2b760fcf 1084
d4bea421 1085 trace_fib6_table_lookup(net, f6i, table, fl6);
d3843fe5 1086
2b760fcf 1087 /* Search through exception table */
23fb93a4
DA
1088 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1089 if (rt) {
dec9b0e2
DA
1090 if (ip6_hold_safe(net, &rt, true))
1091 dst_use_noref(&rt->dst, jiffies);
23fb93a4 1092 } else if (f6i == net->ipv6.fib6_null_entry) {
dec9b0e2
DA
1093 rt = net->ipv6.ip6_null_entry;
1094 dst_hold(&rt->dst);
23fb93a4
DA
1095 } else {
1096 rt = ip6_create_rt_rcu(f6i);
1097 if (!rt) {
1098 rt = net->ipv6.ip6_null_entry;
1099 dst_hold(&rt->dst);
1100 }
dec9b0e2 1101 }
b811580d 1102
66f5d6ce 1103 rcu_read_unlock();
b811580d 1104
c71099ac 1105 return rt;
c71099ac
TG
1106}
1107
67ba4152 1108struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1109 const struct sk_buff *skb, int flags)
ea6e574e 1110{
b75cc8f9 1111 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1112}
1113EXPORT_SYMBOL_GPL(ip6_route_lookup);
1114
9acd9f3a 1115struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1116 const struct in6_addr *saddr, int oif,
1117 const struct sk_buff *skb, int strict)
c71099ac 1118{
4c9483b2
DM
1119 struct flowi6 fl6 = {
1120 .flowi6_oif = oif,
1121 .daddr = *daddr,
c71099ac
TG
1122 };
1123 struct dst_entry *dst;
77d16f45 1124 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1125
adaa70bb 1126 if (saddr) {
4c9483b2 1127 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1128 flags |= RT6_LOOKUP_F_HAS_SADDR;
1129 }
1130
b75cc8f9 1131 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1132 if (dst->error == 0)
1133 return (struct rt6_info *) dst;
1134
1135 dst_release(dst);
1136
1da177e4
LT
1137 return NULL;
1138}
7159039a
YH
1139EXPORT_SYMBOL(rt6_lookup);
1140
c71099ac 1141/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1142 * It takes new route entry, the addition fails by any reason the
1143 * route is released.
1144 * Caller must hold dst before calling it.
1da177e4
LT
1145 */
1146
8d1c802b 1147static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
333c4301 1148 struct netlink_ext_ack *extack)
1da177e4
LT
1149{
1150 int err;
c71099ac 1151 struct fib6_table *table;
1da177e4 1152
93c2fb25 1153 table = rt->fib6_table;
66f5d6ce 1154 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1155 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1156 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1157
1158 return err;
1159}
1160
8d1c802b 1161int ip6_ins_rt(struct net *net, struct fib6_info *rt)
40e22e8f 1162{
afb1d4b5 1163 struct nl_info info = { .nl_net = net, };
e715b6d3 1164
d4ead6b3 1165 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1166}
1167
8d1c802b 1168static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
8b9df265
MKL
1169 const struct in6_addr *daddr,
1170 const struct in6_addr *saddr)
1da177e4 1171{
4832c30d 1172 struct net_device *dev;
1da177e4
LT
1173 struct rt6_info *rt;
1174
1175 /*
1176 * Clone the route.
1177 */
1178
e873e4b9
WW
1179 if (!fib6_info_hold_safe(ort))
1180 return NULL;
1181
4832c30d 1182 dev = ip6_rt_get_dev_rcu(ort);
93531c67 1183 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
e873e4b9
WW
1184 if (!rt) {
1185 fib6_info_release(ort);
83a09abd 1186 return NULL;
e873e4b9 1187 }
83a09abd
MKL
1188
1189 ip6_rt_copy_init(rt, ort);
1190 rt->rt6i_flags |= RTF_CACHE;
83a09abd
MKL
1191 rt->dst.flags |= DST_HOST;
1192 rt->rt6i_dst.addr = *daddr;
1193 rt->rt6i_dst.plen = 128;
1da177e4 1194
83a09abd 1195 if (!rt6_is_gw_or_nonexthop(ort)) {
93c2fb25
DA
1196 if (ort->fib6_dst.plen != 128 &&
1197 ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
83a09abd 1198 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1199#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1200 if (rt->rt6i_src.plen && saddr) {
1201 rt->rt6i_src.addr = *saddr;
1202 rt->rt6i_src.plen = 128;
8b9df265 1203 }
83a09abd 1204#endif
95a9a5ba 1205 }
1da177e4 1206
95a9a5ba
YH
1207 return rt;
1208}
1da177e4 1209
8d1c802b 1210static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
d52d3997 1211{
3b6761d1 1212 unsigned short flags = fib6_info_dst_flags(rt);
4832c30d 1213 struct net_device *dev;
d52d3997
MKL
1214 struct rt6_info *pcpu_rt;
1215
e873e4b9
WW
1216 if (!fib6_info_hold_safe(rt))
1217 return NULL;
1218
4832c30d
DA
1219 rcu_read_lock();
1220 dev = ip6_rt_get_dev_rcu(rt);
93531c67 1221 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1222 rcu_read_unlock();
e873e4b9
WW
1223 if (!pcpu_rt) {
1224 fib6_info_release(rt);
d52d3997 1225 return NULL;
e873e4b9 1226 }
d52d3997 1227 ip6_rt_copy_init(pcpu_rt, rt);
d52d3997
MKL
1228 pcpu_rt->rt6i_flags |= RTF_PCPU;
1229 return pcpu_rt;
1230}
1231
66f5d6ce 1232/* It should be called with rcu_read_lock() acquired */
8d1c802b 1233static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
d52d3997 1234{
a73e4195 1235 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1236
1237 p = this_cpu_ptr(rt->rt6i_pcpu);
1238 pcpu_rt = *p;
1239
d4ead6b3
DA
1240 if (pcpu_rt)
1241 ip6_hold_safe(NULL, &pcpu_rt, false);
d3843fe5 1242
a73e4195
MKL
1243 return pcpu_rt;
1244}
1245
afb1d4b5 1246static struct rt6_info *rt6_make_pcpu_route(struct net *net,
8d1c802b 1247 struct fib6_info *rt)
a73e4195
MKL
1248{
1249 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1250
1251 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1252 if (!pcpu_rt) {
9c7370a1
MKL
1253 dst_hold(&net->ipv6.ip6_null_entry->dst);
1254 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1255 }
1256
a94b9367
WW
1257 dst_hold(&pcpu_rt->dst);
1258 p = this_cpu_ptr(rt->rt6i_pcpu);
1259 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1260 BUG_ON(prev);
a94b9367 1261
d52d3997
MKL
1262 return pcpu_rt;
1263}
1264
35732d01
WW
1265/* exception hash table implementation
1266 */
1267static DEFINE_SPINLOCK(rt6_exception_lock);
1268
1269/* Remove rt6_ex from hash table and free the memory
1270 * Caller must hold rt6_exception_lock
1271 */
1272static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1273 struct rt6_exception *rt6_ex)
1274{
65cafef4 1275 struct fib6_info *from;
b2427e67 1276 struct net *net;
81eb8447 1277
35732d01
WW
1278 if (!bucket || !rt6_ex)
1279 return;
b2427e67
CIK
1280
1281 net = dev_net(rt6_ex->rt6i->dst.dev);
65cafef4
PA
1282 net->ipv6.rt6_stats->fib_rt_cache--;
1283
1284 /* purge completely the exception to allow releasing the held resources:
1285 * some [sk] cache may keep the dst around for unlimited time
1286 */
1287 from = rcu_dereference_protected(rt6_ex->rt6i->from,
1288 lockdep_is_held(&rt6_exception_lock));
1289 rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
1290 fib6_info_release(from);
1291 dst_dev_put(&rt6_ex->rt6i->dst);
1292
35732d01 1293 hlist_del_rcu(&rt6_ex->hlist);
77634cc6 1294 dst_release(&rt6_ex->rt6i->dst);
35732d01
WW
1295 kfree_rcu(rt6_ex, rcu);
1296 WARN_ON_ONCE(!bucket->depth);
1297 bucket->depth--;
1298}
1299
1300/* Remove oldest rt6_ex in bucket and free the memory
1301 * Caller must hold rt6_exception_lock
1302 */
1303static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1304{
1305 struct rt6_exception *rt6_ex, *oldest = NULL;
1306
1307 if (!bucket)
1308 return;
1309
1310 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1311 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1312 oldest = rt6_ex;
1313 }
1314 rt6_remove_exception(bucket, oldest);
1315}
1316
1317static u32 rt6_exception_hash(const struct in6_addr *dst,
1318 const struct in6_addr *src)
1319{
1320 static u32 seed __read_mostly;
1321 u32 val;
1322
1323 net_get_random_once(&seed, sizeof(seed));
1324 val = jhash(dst, sizeof(*dst), seed);
1325
1326#ifdef CONFIG_IPV6_SUBTREES
1327 if (src)
1328 val = jhash(src, sizeof(*src), val);
1329#endif
1330 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1331}
1332
1333/* Helper function to find the cached rt in the hash table
1334 * and update bucket pointer to point to the bucket for this
1335 * (daddr, saddr) pair
1336 * Caller must hold rt6_exception_lock
1337 */
1338static struct rt6_exception *
1339__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1340 const struct in6_addr *daddr,
1341 const struct in6_addr *saddr)
1342{
1343 struct rt6_exception *rt6_ex;
1344 u32 hval;
1345
1346 if (!(*bucket) || !daddr)
1347 return NULL;
1348
1349 hval = rt6_exception_hash(daddr, saddr);
1350 *bucket += hval;
1351
1352 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1353 struct rt6_info *rt6 = rt6_ex->rt6i;
1354 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1355
1356#ifdef CONFIG_IPV6_SUBTREES
1357 if (matched && saddr)
1358 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1359#endif
1360 if (matched)
1361 return rt6_ex;
1362 }
1363 return NULL;
1364}
1365
1366/* Helper function to find the cached rt in the hash table
1367 * and update bucket pointer to point to the bucket for this
1368 * (daddr, saddr) pair
1369 * Caller must hold rcu_read_lock()
1370 */
1371static struct rt6_exception *
1372__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1373 const struct in6_addr *daddr,
1374 const struct in6_addr *saddr)
1375{
1376 struct rt6_exception *rt6_ex;
1377 u32 hval;
1378
1379 WARN_ON_ONCE(!rcu_read_lock_held());
1380
1381 if (!(*bucket) || !daddr)
1382 return NULL;
1383
1384 hval = rt6_exception_hash(daddr, saddr);
1385 *bucket += hval;
1386
1387 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1388 struct rt6_info *rt6 = rt6_ex->rt6i;
1389 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1390
1391#ifdef CONFIG_IPV6_SUBTREES
1392 if (matched && saddr)
1393 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1394#endif
1395 if (matched)
1396 return rt6_ex;
1397 }
1398 return NULL;
1399}
1400
8d1c802b 1401static unsigned int fib6_mtu(const struct fib6_info *rt)
d4ead6b3
DA
1402{
1403 unsigned int mtu;
1404
dcd1f572
DA
1405 if (rt->fib6_pmtu) {
1406 mtu = rt->fib6_pmtu;
1407 } else {
1408 struct net_device *dev = fib6_info_nh_dev(rt);
1409 struct inet6_dev *idev;
1410
1411 rcu_read_lock();
1412 idev = __in6_dev_get(dev);
1413 mtu = idev->cnf.mtu6;
1414 rcu_read_unlock();
1415 }
1416
d4ead6b3
DA
1417 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1418
1419 return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1420}
1421
35732d01 1422static int rt6_insert_exception(struct rt6_info *nrt,
8d1c802b 1423 struct fib6_info *ort)
35732d01 1424{
5e670d84 1425 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1426 struct rt6_exception_bucket *bucket;
1427 struct in6_addr *src_key = NULL;
1428 struct rt6_exception *rt6_ex;
1429 int err = 0;
1430
35732d01
WW
1431 spin_lock_bh(&rt6_exception_lock);
1432
1433 if (ort->exception_bucket_flushed) {
1434 err = -EINVAL;
1435 goto out;
1436 }
1437
1438 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1439 lockdep_is_held(&rt6_exception_lock));
1440 if (!bucket) {
1441 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1442 GFP_ATOMIC);
1443 if (!bucket) {
1444 err = -ENOMEM;
1445 goto out;
1446 }
1447 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1448 }
1449
1450#ifdef CONFIG_IPV6_SUBTREES
1451 /* rt6i_src.plen != 0 indicates ort is in subtree
1452 * and exception table is indexed by a hash of
1453 * both rt6i_dst and rt6i_src.
1454 * Otherwise, the exception table is indexed by
1455 * a hash of only rt6i_dst.
1456 */
93c2fb25 1457 if (ort->fib6_src.plen)
35732d01
WW
1458 src_key = &nrt->rt6i_src.addr;
1459#endif
f5bbe7ee
WW
1460 /* rt6_mtu_change() might lower mtu on ort.
1461 * Only insert this exception route if its mtu
1462 * is less than ort's mtu value.
1463 */
d4ead6b3 1464 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
f5bbe7ee
WW
1465 err = -EINVAL;
1466 goto out;
1467 }
60006a48 1468
35732d01
WW
1469 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1470 src_key);
1471 if (rt6_ex)
1472 rt6_remove_exception(bucket, rt6_ex);
1473
1474 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1475 if (!rt6_ex) {
1476 err = -ENOMEM;
1477 goto out;
1478 }
1479 rt6_ex->rt6i = nrt;
1480 rt6_ex->stamp = jiffies;
35732d01
WW
1481 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1482 bucket->depth++;
81eb8447 1483 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1484
1485 if (bucket->depth > FIB6_MAX_DEPTH)
1486 rt6_exception_remove_oldest(bucket);
1487
1488out:
1489 spin_unlock_bh(&rt6_exception_lock);
1490
1491 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1492 if (!err) {
93c2fb25 1493 spin_lock_bh(&ort->fib6_table->tb6_lock);
7aef6859 1494 fib6_update_sernum(net, ort);
93c2fb25 1495 spin_unlock_bh(&ort->fib6_table->tb6_lock);
b886d5f2
PA
1496 fib6_force_start_gc(net);
1497 }
35732d01
WW
1498
1499 return err;
1500}
1501
8d1c802b 1502void rt6_flush_exceptions(struct fib6_info *rt)
35732d01
WW
1503{
1504 struct rt6_exception_bucket *bucket;
1505 struct rt6_exception *rt6_ex;
1506 struct hlist_node *tmp;
1507 int i;
1508
1509 spin_lock_bh(&rt6_exception_lock);
1510 /* Prevent rt6_insert_exception() to recreate the bucket list */
1511 rt->exception_bucket_flushed = 1;
1512
1513 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1514 lockdep_is_held(&rt6_exception_lock));
1515 if (!bucket)
1516 goto out;
1517
1518 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1519 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1520 rt6_remove_exception(bucket, rt6_ex);
1521 WARN_ON_ONCE(bucket->depth);
1522 bucket++;
1523 }
1524
1525out:
1526 spin_unlock_bh(&rt6_exception_lock);
1527}
1528
1529/* Find cached rt in the hash table inside passed in rt
1530 * Caller has to hold rcu_read_lock()
1531 */
8d1c802b 1532static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
1533 struct in6_addr *daddr,
1534 struct in6_addr *saddr)
1535{
1536 struct rt6_exception_bucket *bucket;
1537 struct in6_addr *src_key = NULL;
1538 struct rt6_exception *rt6_ex;
1539 struct rt6_info *res = NULL;
1540
1541 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1542
1543#ifdef CONFIG_IPV6_SUBTREES
1544 /* rt6i_src.plen != 0 indicates rt is in subtree
1545 * and exception table is indexed by a hash of
1546 * both rt6i_dst and rt6i_src.
1547 * Otherwise, the exception table is indexed by
1548 * a hash of only rt6i_dst.
1549 */
93c2fb25 1550 if (rt->fib6_src.plen)
35732d01
WW
1551 src_key = saddr;
1552#endif
1553 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1554
1555 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1556 res = rt6_ex->rt6i;
1557
1558 return res;
1559}
1560
1561/* Remove the passed in cached rt from the hash table that contains it */
23fb93a4 1562static int rt6_remove_exception_rt(struct rt6_info *rt)
35732d01 1563{
35732d01
WW
1564 struct rt6_exception_bucket *bucket;
1565 struct in6_addr *src_key = NULL;
1566 struct rt6_exception *rt6_ex;
8a14e46f 1567 struct fib6_info *from;
35732d01
WW
1568 int err;
1569
091311de 1570 from = rcu_dereference(rt->from);
35732d01 1571 if (!from ||
442d713b 1572 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1573 return -EINVAL;
1574
1575 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1576 return -ENOENT;
1577
1578 spin_lock_bh(&rt6_exception_lock);
1579 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1580 lockdep_is_held(&rt6_exception_lock));
1581#ifdef CONFIG_IPV6_SUBTREES
1582 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1583 * and exception table is indexed by a hash of
1584 * both rt6i_dst and rt6i_src.
1585 * Otherwise, the exception table is indexed by
1586 * a hash of only rt6i_dst.
1587 */
93c2fb25 1588 if (from->fib6_src.plen)
35732d01
WW
1589 src_key = &rt->rt6i_src.addr;
1590#endif
1591 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1592 &rt->rt6i_dst.addr,
1593 src_key);
1594 if (rt6_ex) {
1595 rt6_remove_exception(bucket, rt6_ex);
1596 err = 0;
1597 } else {
1598 err = -ENOENT;
1599 }
1600
1601 spin_unlock_bh(&rt6_exception_lock);
1602 return err;
1603}
1604
1605/* Find rt6_ex which contains the passed in rt cache and
1606 * refresh its stamp
1607 */
1608static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1609{
35732d01
WW
1610 struct rt6_exception_bucket *bucket;
1611 struct in6_addr *src_key = NULL;
1612 struct rt6_exception *rt6_ex;
1f1cdcc0 1613 struct fib6_info *from;
35732d01
WW
1614
1615 rcu_read_lock();
1f1cdcc0
PA
1616 from = rcu_dereference(rt->from);
1617 if (!from || !(rt->rt6i_flags & RTF_CACHE))
1618 goto unlock;
1619
35732d01
WW
1620 bucket = rcu_dereference(from->rt6i_exception_bucket);
1621
1622#ifdef CONFIG_IPV6_SUBTREES
1623 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1624 * and exception table is indexed by a hash of
1625 * both rt6i_dst and rt6i_src.
1626 * Otherwise, the exception table is indexed by
1627 * a hash of only rt6i_dst.
1628 */
93c2fb25 1629 if (from->fib6_src.plen)
35732d01
WW
1630 src_key = &rt->rt6i_src.addr;
1631#endif
1632 rt6_ex = __rt6_find_exception_rcu(&bucket,
1633 &rt->rt6i_dst.addr,
1634 src_key);
1635 if (rt6_ex)
1636 rt6_ex->stamp = jiffies;
1637
1f1cdcc0 1638unlock:
35732d01
WW
1639 rcu_read_unlock();
1640}
1641
e9fa1495
SB
1642static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1643 struct rt6_info *rt, int mtu)
1644{
1645 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1646 * lowest MTU in the path: always allow updating the route PMTU to
1647 * reflect PMTU decreases.
1648 *
1649 * If the new MTU is higher, and the route PMTU is equal to the local
1650 * MTU, this means the old MTU is the lowest in the path, so allow
1651 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1652 * handle this.
1653 */
1654
1655 if (dst_mtu(&rt->dst) >= mtu)
1656 return true;
1657
1658 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1659 return true;
1660
1661 return false;
1662}
1663
1664static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
8d1c802b 1665 struct fib6_info *rt, int mtu)
f5bbe7ee
WW
1666{
1667 struct rt6_exception_bucket *bucket;
1668 struct rt6_exception *rt6_ex;
1669 int i;
1670
1671 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1672 lockdep_is_held(&rt6_exception_lock));
1673
e9fa1495
SB
1674 if (!bucket)
1675 return;
1676
1677 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1678 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1679 struct rt6_info *entry = rt6_ex->rt6i;
1680
1681 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1682 * route), the metrics of its rt->from have already
e9fa1495
SB
1683 * been updated.
1684 */
d4ead6b3 1685 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1686 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1687 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1688 }
e9fa1495 1689 bucket++;
f5bbe7ee
WW
1690 }
1691}
1692
b16cb459
WW
1693#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1694
8d1c802b 1695static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
b16cb459
WW
1696 struct in6_addr *gateway)
1697{
1698 struct rt6_exception_bucket *bucket;
1699 struct rt6_exception *rt6_ex;
1700 struct hlist_node *tmp;
1701 int i;
1702
1703 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1704 return;
1705
1706 spin_lock_bh(&rt6_exception_lock);
1707 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1708 lockdep_is_held(&rt6_exception_lock));
1709
1710 if (bucket) {
1711 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1712 hlist_for_each_entry_safe(rt6_ex, tmp,
1713 &bucket->chain, hlist) {
1714 struct rt6_info *entry = rt6_ex->rt6i;
1715
1716 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1717 RTF_CACHE_GATEWAY &&
1718 ipv6_addr_equal(gateway,
1719 &entry->rt6i_gateway)) {
1720 rt6_remove_exception(bucket, rt6_ex);
1721 }
1722 }
1723 bucket++;
1724 }
1725 }
1726
1727 spin_unlock_bh(&rt6_exception_lock);
1728}
1729
c757faa8
WW
1730static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1731 struct rt6_exception *rt6_ex,
1732 struct fib6_gc_args *gc_args,
1733 unsigned long now)
1734{
1735 struct rt6_info *rt = rt6_ex->rt6i;
1736
1859bac0
PA
1737 /* we are pruning and obsoleting aged-out and non gateway exceptions
1738 * even if others have still references to them, so that on next
1739 * dst_check() such references can be dropped.
1740 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1741 * expired, independently from their aging, as per RFC 8201 section 4
1742 */
31afeb42
WW
1743 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1744 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1745 RT6_TRACE("aging clone %p\n", rt);
1746 rt6_remove_exception(bucket, rt6_ex);
1747 return;
1748 }
1749 } else if (time_after(jiffies, rt->dst.expires)) {
1750 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1751 rt6_remove_exception(bucket, rt6_ex);
1752 return;
31afeb42
WW
1753 }
1754
1755 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1756 struct neighbour *neigh;
1757 __u8 neigh_flags = 0;
1758
1bfa26ff
ED
1759 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1760 if (neigh)
c757faa8 1761 neigh_flags = neigh->flags;
1bfa26ff 1762
c757faa8
WW
1763 if (!(neigh_flags & NTF_ROUTER)) {
1764 RT6_TRACE("purging route %p via non-router but gateway\n",
1765 rt);
1766 rt6_remove_exception(bucket, rt6_ex);
1767 return;
1768 }
1769 }
31afeb42 1770
c757faa8
WW
1771 gc_args->more++;
1772}
1773
8d1c802b 1774void rt6_age_exceptions(struct fib6_info *rt,
c757faa8
WW
1775 struct fib6_gc_args *gc_args,
1776 unsigned long now)
1777{
1778 struct rt6_exception_bucket *bucket;
1779 struct rt6_exception *rt6_ex;
1780 struct hlist_node *tmp;
1781 int i;
1782
1783 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1784 return;
1785
1bfa26ff
ED
1786 rcu_read_lock_bh();
1787 spin_lock(&rt6_exception_lock);
c757faa8
WW
1788 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1789 lockdep_is_held(&rt6_exception_lock));
1790
1791 if (bucket) {
1792 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1793 hlist_for_each_entry_safe(rt6_ex, tmp,
1794 &bucket->chain, hlist) {
1795 rt6_age_examine_exception(bucket, rt6_ex,
1796 gc_args, now);
1797 }
1798 bucket++;
1799 }
1800 }
1bfa26ff
ED
1801 spin_unlock(&rt6_exception_lock);
1802 rcu_read_unlock_bh();
c757faa8
WW
1803}
1804
1d053da9
DA
1805/* must be called with rcu lock held */
1806struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
1807 int oif, struct flowi6 *fl6, int strict)
1da177e4 1808{
367efcb9 1809 struct fib6_node *fn, *saved_fn;
8d1c802b 1810 struct fib6_info *f6i;
1da177e4 1811
6454743b 1812 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1813 saved_fn = fn;
1da177e4 1814
ca254490
DA
1815 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1816 oif = 0;
1817
a3c00e46 1818redo_rt6_select:
23fb93a4 1819 f6i = rt6_select(net, fn, oif, strict);
23fb93a4 1820 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1821 fn = fib6_backtrack(fn, &fl6->saddr);
1822 if (fn)
1823 goto redo_rt6_select;
367efcb9
MKL
1824 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1825 /* also consider unreachable route */
1826 strict &= ~RT6_LOOKUP_F_REACHABLE;
1827 fn = saved_fn;
1828 goto redo_rt6_select;
367efcb9 1829 }
a3c00e46
MKL
1830 }
1831
d4bea421 1832 trace_fib6_table_lookup(net, f6i, table, fl6);
fb9de91e 1833
1d053da9
DA
1834 return f6i;
1835}
1836
1837struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1838 int oif, struct flowi6 *fl6,
1839 const struct sk_buff *skb, int flags)
1840{
1841 struct fib6_info *f6i;
1842 struct rt6_info *rt;
1843 int strict = 0;
1844
1845 strict |= flags & RT6_LOOKUP_F_IFACE;
1846 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1847 if (net->ipv6.devconf_all->forwarding == 0)
1848 strict |= RT6_LOOKUP_F_REACHABLE;
1849
1850 rcu_read_lock();
1851
1852 f6i = fib6_table_lookup(net, table, oif, fl6, strict);
1853 if (f6i->fib6_nsiblings)
1854 f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
1855
23fb93a4 1856 if (f6i == net->ipv6.fib6_null_entry) {
421842ed 1857 rt = net->ipv6.ip6_null_entry;
66f5d6ce 1858 rcu_read_unlock();
d3843fe5 1859 dst_hold(&rt->dst);
d3843fe5 1860 return rt;
23fb93a4
DA
1861 }
1862
1863 /*Search through exception table */
1864 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1865 if (rt) {
d4ead6b3 1866 if (ip6_hold_safe(net, &rt, true))
d3843fe5 1867 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 1868
66f5d6ce 1869 rcu_read_unlock();
d52d3997 1870 return rt;
3da59bd9 1871 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
93c2fb25 1872 !(f6i->fib6_flags & RTF_GATEWAY))) {
3da59bd9
MKL
1873 /* Create a RTF_CACHE clone which will not be
1874 * owned by the fib6 tree. It is for the special case where
1875 * the daddr in the skb during the neighbor look-up is different
1876 * from the fl6->daddr used to look-up route here.
1877 */
3da59bd9
MKL
1878 struct rt6_info *uncached_rt;
1879
23fb93a4 1880 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
d52d3997 1881
4d85cd0c 1882 rcu_read_unlock();
c71099ac 1883
1cfb71ee
WW
1884 if (uncached_rt) {
1885 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1886 * No need for another dst_hold()
1887 */
8d0b94af 1888 rt6_uncached_list_add(uncached_rt);
81eb8447 1889 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1890 } else {
3da59bd9 1891 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1892 dst_hold(&uncached_rt->dst);
1893 }
b811580d 1894
3da59bd9 1895 return uncached_rt;
d52d3997
MKL
1896 } else {
1897 /* Get a percpu copy */
1898
1899 struct rt6_info *pcpu_rt;
1900
951f788a 1901 local_bh_disable();
23fb93a4 1902 pcpu_rt = rt6_get_pcpu_route(f6i);
d52d3997 1903
93531c67
DA
1904 if (!pcpu_rt)
1905 pcpu_rt = rt6_make_pcpu_route(net, f6i);
1906
951f788a
ED
1907 local_bh_enable();
1908 rcu_read_unlock();
d4bea421 1909
d52d3997
MKL
1910 return pcpu_rt;
1911 }
1da177e4 1912}
9ff74384 1913EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1914
b75cc8f9
DA
1915static struct rt6_info *ip6_pol_route_input(struct net *net,
1916 struct fib6_table *table,
1917 struct flowi6 *fl6,
1918 const struct sk_buff *skb,
1919 int flags)
4acad72d 1920{
b75cc8f9 1921 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1922}
1923
d409b847
MB
1924struct dst_entry *ip6_route_input_lookup(struct net *net,
1925 struct net_device *dev,
b75cc8f9
DA
1926 struct flowi6 *fl6,
1927 const struct sk_buff *skb,
1928 int flags)
72331bc0
SL
1929{
1930 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1931 flags |= RT6_LOOKUP_F_IFACE;
1932
b75cc8f9 1933 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1934}
d409b847 1935EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1936
23aebdac 1937static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1938 struct flow_keys *keys,
1939 struct flow_keys *flkeys)
23aebdac
JS
1940{
1941 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1942 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1943 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1944 const struct ipv6hdr *inner_iph;
1945 const struct icmp6hdr *icmph;
1946 struct ipv6hdr _inner_iph;
cea67a2d 1947 struct icmp6hdr _icmph;
23aebdac
JS
1948
1949 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1950 goto out;
1951
cea67a2d
ED
1952 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1953 sizeof(_icmph), &_icmph);
1954 if (!icmph)
1955 goto out;
1956
23aebdac
JS
1957 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1958 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1959 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1960 icmph->icmp6_type != ICMPV6_PARAMPROB)
1961 goto out;
1962
1963 inner_iph = skb_header_pointer(skb,
1964 skb_transport_offset(skb) + sizeof(*icmph),
1965 sizeof(_inner_iph), &_inner_iph);
1966 if (!inner_iph)
1967 goto out;
1968
1969 key_iph = inner_iph;
5e5d6fed 1970 _flkeys = NULL;
23aebdac 1971out:
5e5d6fed
RP
1972 if (_flkeys) {
1973 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1974 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1975 keys->tags.flow_label = _flkeys->tags.flow_label;
1976 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1977 } else {
1978 keys->addrs.v6addrs.src = key_iph->saddr;
1979 keys->addrs.v6addrs.dst = key_iph->daddr;
fa1be7e0 1980 keys->tags.flow_label = ip6_flowlabel(key_iph);
5e5d6fed
RP
1981 keys->basic.ip_proto = key_iph->nexthdr;
1982 }
23aebdac
JS
1983}
1984
1985/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
1986u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1987 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
1988{
1989 struct flow_keys hash_keys;
9a2a537a 1990 u32 mhash;
23aebdac 1991
bbfa047a 1992 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
1993 case 0:
1994 memset(&hash_keys, 0, sizeof(hash_keys));
1995 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1996 if (skb) {
1997 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
1998 } else {
1999 hash_keys.addrs.v6addrs.src = fl6->saddr;
2000 hash_keys.addrs.v6addrs.dst = fl6->daddr;
fa1be7e0 2001 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
b4bac172
DA
2002 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2003 }
2004 break;
2005 case 1:
2006 if (skb) {
2007 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2008 struct flow_keys keys;
2009
2010 /* short-circuit if we already have L4 hash present */
2011 if (skb->l4_hash)
2012 return skb_get_hash_raw(skb) >> 1;
2013
2014 memset(&hash_keys, 0, sizeof(hash_keys));
2015
2016 if (!flkeys) {
2017 skb_flow_dissect_flow_keys(skb, &keys, flag);
2018 flkeys = &keys;
2019 }
2020 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2021 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2022 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2023 hash_keys.ports.src = flkeys->ports.src;
2024 hash_keys.ports.dst = flkeys->ports.dst;
2025 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2026 } else {
2027 memset(&hash_keys, 0, sizeof(hash_keys));
2028 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2029 hash_keys.addrs.v6addrs.src = fl6->saddr;
2030 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2031 hash_keys.ports.src = fl6->fl6_sport;
2032 hash_keys.ports.dst = fl6->fl6_dport;
2033 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2034 }
2035 break;
23aebdac 2036 }
9a2a537a 2037 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2038
9a2a537a 2039 return mhash >> 1;
23aebdac
JS
2040}
2041
c71099ac
TG
2042void ip6_route_input(struct sk_buff *skb)
2043{
b71d1d42 2044 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2045 struct net *net = dev_net(skb->dev);
adaa70bb 2046 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2047 struct ip_tunnel_info *tun_info;
4c9483b2 2048 struct flowi6 fl6 = {
e0d56fdd 2049 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2050 .daddr = iph->daddr,
2051 .saddr = iph->saddr,
6502ca52 2052 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2053 .flowi6_mark = skb->mark,
2054 .flowi6_proto = iph->nexthdr,
c71099ac 2055 };
5e5d6fed 2056 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2057
904af04d 2058 tun_info = skb_tunnel_info(skb);
46fa062a 2059 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2060 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2061
2062 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2063 flkeys = &_flkeys;
2064
23aebdac 2065 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2066 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2067 skb_dst_drop(skb);
b75cc8f9
DA
2068 skb_dst_set(skb,
2069 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2070}
2071
b75cc8f9
DA
2072static struct rt6_info *ip6_pol_route_output(struct net *net,
2073 struct fib6_table *table,
2074 struct flowi6 *fl6,
2075 const struct sk_buff *skb,
2076 int flags)
1da177e4 2077{
b75cc8f9 2078 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2079}
2080
6f21c96a
PA
2081struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2082 struct flowi6 *fl6, int flags)
c71099ac 2083{
d46a9d67 2084 bool any_src;
c71099ac 2085
3ede0bbc
RS
2086 if (ipv6_addr_type(&fl6->daddr) &
2087 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
4c1feac5
DA
2088 struct dst_entry *dst;
2089
2090 dst = l3mdev_link_scope_lookup(net, fl6);
2091 if (dst)
2092 return dst;
2093 }
ca254490 2094
1fb9489b 2095 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2096
d46a9d67 2097 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2098 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2099 (fl6->flowi6_oif && any_src))
77d16f45 2100 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2101
d46a9d67 2102 if (!any_src)
adaa70bb 2103 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2104 else if (sk)
2105 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2106
b75cc8f9 2107 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2108}
6f21c96a 2109EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2110
2774c131 2111struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2112{
5c1e6aa3 2113 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2114 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2115 struct dst_entry *new = NULL;
2116
1dbe3252 2117 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2118 DST_OBSOLETE_DEAD, 0);
14e50e57 2119 if (rt) {
0a1f5962 2120 rt6_info_init(rt);
81eb8447 2121 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2122
0a1f5962 2123 new = &rt->dst;
14e50e57 2124 new->__use = 1;
352e512c 2125 new->input = dst_discard;
ede2059d 2126 new->output = dst_discard_out;
14e50e57 2127
0a1f5962 2128 dst_copy_metrics(new, &ort->dst);
14e50e57 2129
1dbe3252 2130 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2131 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2132 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2133
2134 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2135#ifdef CONFIG_IPV6_SUBTREES
2136 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2137#endif
14e50e57
DM
2138 }
2139
69ead7af
DM
2140 dst_release(dst_orig);
2141 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2142}
14e50e57 2143
1da177e4
LT
2144/*
2145 * Destination cache support functions
2146 */
2147
8d1c802b 2148static bool fib6_check(struct fib6_info *f6i, u32 cookie)
4b32b5ad 2149{
93531c67
DA
2150 u32 rt_cookie = 0;
2151
8ae86971 2152 if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
93531c67
DA
2153 return false;
2154
2155 if (fib6_check_expired(f6i))
2156 return false;
2157
2158 return true;
4b32b5ad
MKL
2159}
2160
a68886a6
DA
2161static struct dst_entry *rt6_check(struct rt6_info *rt,
2162 struct fib6_info *from,
2163 u32 cookie)
3da59bd9 2164{
36143645 2165 u32 rt_cookie = 0;
c5cff856 2166
a68886a6 2167 if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
93531c67 2168 rt_cookie != cookie)
3da59bd9
MKL
2169 return NULL;
2170
2171 if (rt6_check_expired(rt))
2172 return NULL;
2173
2174 return &rt->dst;
2175}
2176
a68886a6
DA
2177static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2178 struct fib6_info *from,
2179 u32 cookie)
3da59bd9 2180{
5973fb1e
MKL
2181 if (!__rt6_check_expired(rt) &&
2182 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
a68886a6 2183 fib6_check(from, cookie))
3da59bd9
MKL
2184 return &rt->dst;
2185 else
2186 return NULL;
2187}
2188
1da177e4
LT
2189static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2190{
a87b7dc9 2191 struct dst_entry *dst_ret;
a68886a6 2192 struct fib6_info *from;
1da177e4
LT
2193 struct rt6_info *rt;
2194
a87b7dc9
DA
2195 rt = container_of(dst, struct rt6_info, dst);
2196
2197 rcu_read_lock();
1da177e4 2198
6f3118b5
ND
2199 /* All IPV6 dsts are created with ->obsolete set to the value
2200 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2201 * into this function always.
2202 */
e3bc10bd 2203
a68886a6 2204 from = rcu_dereference(rt->from);
4b32b5ad 2205
a68886a6
DA
2206 if (from && (rt->rt6i_flags & RTF_PCPU ||
2207 unlikely(!list_empty(&rt->rt6i_uncached))))
2208 dst_ret = rt6_dst_from_check(rt, from, cookie);
3da59bd9 2209 else
a68886a6 2210 dst_ret = rt6_check(rt, from, cookie);
a87b7dc9
DA
2211
2212 rcu_read_unlock();
2213
2214 return dst_ret;
1da177e4
LT
2215}
2216
2217static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2218{
2219 struct rt6_info *rt = (struct rt6_info *) dst;
2220
2221 if (rt) {
54c1a859 2222 if (rt->rt6i_flags & RTF_CACHE) {
c3c14da0 2223 rcu_read_lock();
54c1a859 2224 if (rt6_check_expired(rt)) {
93531c67 2225 rt6_remove_exception_rt(rt);
54c1a859
YH
2226 dst = NULL;
2227 }
c3c14da0 2228 rcu_read_unlock();
54c1a859 2229 } else {
1da177e4 2230 dst_release(dst);
54c1a859
YH
2231 dst = NULL;
2232 }
1da177e4 2233 }
54c1a859 2234 return dst;
1da177e4
LT
2235}
2236
2237static void ip6_link_failure(struct sk_buff *skb)
2238{
2239 struct rt6_info *rt;
2240
3ffe533c 2241 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2242
adf30907 2243 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2244 if (rt) {
8a14e46f 2245 rcu_read_lock();
1eb4f758 2246 if (rt->rt6i_flags & RTF_CACHE) {
761f6026 2247 rt6_remove_exception_rt(rt);
c5cff856 2248 } else {
a68886a6 2249 struct fib6_info *from;
c5cff856
WW
2250 struct fib6_node *fn;
2251
a68886a6
DA
2252 from = rcu_dereference(rt->from);
2253 if (from) {
2254 fn = rcu_dereference(from->fib6_node);
2255 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2256 fn->fn_sernum = -1;
2257 }
1eb4f758 2258 }
8a14e46f 2259 rcu_read_unlock();
1da177e4
LT
2260 }
2261}
2262
6a3e030f
DA
2263static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2264{
a68886a6
DA
2265 if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2266 struct fib6_info *from;
2267
2268 rcu_read_lock();
2269 from = rcu_dereference(rt0->from);
2270 if (from)
2271 rt0->dst.expires = from->expires;
2272 rcu_read_unlock();
2273 }
6a3e030f
DA
2274
2275 dst_set_expires(&rt0->dst, timeout);
2276 rt0->rt6i_flags |= RTF_EXPIRES;
2277}
2278
45e4fd26
MKL
2279static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2280{
2281 struct net *net = dev_net(rt->dst.dev);
2282
d4ead6b3 2283 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2284 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2285 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2286}
2287
0d3f6d29
MKL
2288static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2289{
a68886a6
DA
2290 bool from_set;
2291
2292 rcu_read_lock();
2293 from_set = !!rcu_dereference(rt->from);
2294 rcu_read_unlock();
2295
0d3f6d29 2296 return !(rt->rt6i_flags & RTF_CACHE) &&
a68886a6 2297 (rt->rt6i_flags & RTF_PCPU || from_set);
0d3f6d29
MKL
2298}
2299
45e4fd26
MKL
2300static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2301 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2302{
0dec879f 2303 const struct in6_addr *daddr, *saddr;
67ba4152 2304 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2305
19bda36c
XL
2306 if (dst_metric_locked(dst, RTAX_MTU))
2307 return;
2308
0dec879f
JA
2309 if (iph) {
2310 daddr = &iph->daddr;
2311 saddr = &iph->saddr;
2312 } else if (sk) {
2313 daddr = &sk->sk_v6_daddr;
2314 saddr = &inet6_sk(sk)->saddr;
2315 } else {
2316 daddr = NULL;
2317 saddr = NULL;
2318 }
2319 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2320 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2321 if (mtu >= dst_mtu(dst))
2322 return;
9d289715 2323
0d3f6d29 2324 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2325 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2326 /* update rt6_ex->stamp for cache */
2327 if (rt6->rt6i_flags & RTF_CACHE)
2328 rt6_update_exception_stamp_rt(rt6);
0dec879f 2329 } else if (daddr) {
a68886a6 2330 struct fib6_info *from;
45e4fd26
MKL
2331 struct rt6_info *nrt6;
2332
4d85cd0c 2333 rcu_read_lock();
a68886a6
DA
2334 from = rcu_dereference(rt6->from);
2335 nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
45e4fd26
MKL
2336 if (nrt6) {
2337 rt6_do_update_pmtu(nrt6, mtu);
a68886a6 2338 if (rt6_insert_exception(nrt6, from))
2b760fcf 2339 dst_release_immediate(&nrt6->dst);
45e4fd26 2340 }
a68886a6 2341 rcu_read_unlock();
1da177e4
LT
2342 }
2343}
2344
45e4fd26
MKL
2345static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2346 struct sk_buff *skb, u32 mtu)
2347{
2348 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2349}
2350
42ae66c8 2351void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2352 int oif, u32 mark, kuid_t uid)
81aded24
DM
2353{
2354 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2355 struct dst_entry *dst;
dc92095d
2356 struct flowi6 fl6 = {
2357 .flowi6_oif = oif,
2358 .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2359 .daddr = iph->daddr,
2360 .saddr = iph->saddr,
2361 .flowlabel = ip6_flowinfo(iph),
2362 .flowi6_uid = uid,
2363 };
81aded24
DM
2364
2365 dst = ip6_route_output(net, NULL, &fl6);
2366 if (!dst->error)
45e4fd26 2367 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2368 dst_release(dst);
2369}
2370EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2371
2372void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2373{
7ddacfa5 2374 int oif = sk->sk_bound_dev_if;
33c162a9
MKL
2375 struct dst_entry *dst;
2376
7ddacfa5
DA
2377 if (!oif && skb->dev)
2378 oif = l3mdev_master_ifindex(skb->dev);
2379
2380 ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2381
2382 dst = __sk_dst_get(sk);
2383 if (!dst || !dst->obsolete ||
2384 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2385 return;
2386
2387 bh_lock_sock(sk);
2388 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2389 ip6_datagram_dst_update(sk, false);
2390 bh_unlock_sock(sk);
81aded24
DM
2391}
2392EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2393
7d6850f7
AK
2394void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2395 const struct flowi6 *fl6)
2396{
2397#ifdef CONFIG_IPV6_SUBTREES
2398 struct ipv6_pinfo *np = inet6_sk(sk);
2399#endif
2400
2401 ip6_dst_store(sk, dst,
2402 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2403 &sk->sk_v6_daddr : NULL,
2404#ifdef CONFIG_IPV6_SUBTREES
2405 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2406 &np->saddr :
2407#endif
2408 NULL);
2409}
2410
b55b76b2
DJ
2411/* Handle redirects */
2412struct ip6rd_flowi {
2413 struct flowi6 fl6;
2414 struct in6_addr gateway;
2415};
2416
2417static struct rt6_info *__ip6_route_redirect(struct net *net,
2418 struct fib6_table *table,
2419 struct flowi6 *fl6,
b75cc8f9 2420 const struct sk_buff *skb,
b55b76b2
DJ
2421 int flags)
2422{
2423 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
23fb93a4 2424 struct rt6_info *ret = NULL, *rt_cache;
8d1c802b 2425 struct fib6_info *rt;
b55b76b2
DJ
2426 struct fib6_node *fn;
2427
2428 /* Get the "current" route for this destination and
67c408cf 2429 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2430 *
2431 * RFC 4861 specifies that redirects should only be
2432 * accepted if they come from the nexthop to the target.
2433 * Due to the way the routes are chosen, this notion
2434 * is a bit fuzzy and one might need to check all possible
2435 * routes.
2436 */
2437
66f5d6ce 2438 rcu_read_lock();
6454743b 2439 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
b55b76b2 2440restart:
66f5d6ce 2441 for_each_fib6_node_rt_rcu(fn) {
5e670d84 2442 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c 2443 continue;
14895687 2444 if (fib6_check_expired(rt))
b55b76b2 2445 continue;
93c2fb25 2446 if (rt->fib6_flags & RTF_REJECT)
b55b76b2 2447 break;
93c2fb25 2448 if (!(rt->fib6_flags & RTF_GATEWAY))
b55b76b2 2449 continue;
5e670d84 2450 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
b55b76b2 2451 continue;
2b760fcf
WW
2452 /* rt_cache's gateway might be different from its 'parent'
2453 * in the case of an ip redirect.
2454 * So we keep searching in the exception table if the gateway
2455 * is different.
2456 */
5e670d84 2457 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2b760fcf
WW
2458 rt_cache = rt6_find_cached_rt(rt,
2459 &fl6->daddr,
2460 &fl6->saddr);
2461 if (rt_cache &&
2462 ipv6_addr_equal(&rdfl->gateway,
2463 &rt_cache->rt6i_gateway)) {
23fb93a4 2464 ret = rt_cache;
2b760fcf
WW
2465 break;
2466 }
b55b76b2 2467 continue;
2b760fcf 2468 }
b55b76b2
DJ
2469 break;
2470 }
2471
2472 if (!rt)
421842ed 2473 rt = net->ipv6.fib6_null_entry;
93c2fb25 2474 else if (rt->fib6_flags & RTF_REJECT) {
23fb93a4 2475 ret = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2476 goto out;
2477 }
2478
421842ed 2479 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2480 fn = fib6_backtrack(fn, &fl6->saddr);
2481 if (fn)
2482 goto restart;
b55b76b2 2483 }
a3c00e46 2484
b0a1ba59 2485out:
23fb93a4 2486 if (ret)
e873e4b9 2487 ip6_hold_safe(net, &ret, true);
23fb93a4
DA
2488 else
2489 ret = ip6_create_rt_rcu(rt);
b55b76b2 2490
66f5d6ce 2491 rcu_read_unlock();
b55b76b2 2492
b65f164d 2493 trace_fib6_table_lookup(net, rt, table, fl6);
23fb93a4 2494 return ret;
b55b76b2
DJ
2495};
2496
2497static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2498 const struct flowi6 *fl6,
2499 const struct sk_buff *skb,
2500 const struct in6_addr *gateway)
b55b76b2
DJ
2501{
2502 int flags = RT6_LOOKUP_F_HAS_SADDR;
2503 struct ip6rd_flowi rdfl;
2504
2505 rdfl.fl6 = *fl6;
2506 rdfl.gateway = *gateway;
2507
b75cc8f9 2508 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2509 flags, __ip6_route_redirect);
2510}
2511
e2d118a1
LC
2512void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2513 kuid_t uid)
3a5ad2ee
DM
2514{
2515 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2516 struct dst_entry *dst;
1f7f10ac
2517 struct flowi6 fl6 = {
2518 .flowi6_iif = LOOPBACK_IFINDEX,
2519 .flowi6_oif = oif,
2520 .flowi6_mark = mark,
2521 .daddr = iph->daddr,
2522 .saddr = iph->saddr,
2523 .flowlabel = ip6_flowinfo(iph),
2524 .flowi6_uid = uid,
2525 };
3a5ad2ee 2526
b75cc8f9 2527 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2528 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2529 dst_release(dst);
2530}
2531EXPORT_SYMBOL_GPL(ip6_redirect);
2532
d456336d 2533void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
c92a59ec
DJ
2534{
2535 const struct ipv6hdr *iph = ipv6_hdr(skb);
2536 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2537 struct dst_entry *dst;
0b26fb17
2538 struct flowi6 fl6 = {
2539 .flowi6_iif = LOOPBACK_IFINDEX,
2540 .flowi6_oif = oif,
0b26fb17
2541 .daddr = msg->dest,
2542 .saddr = iph->daddr,
2543 .flowi6_uid = sock_net_uid(net, NULL),
2544 };
c92a59ec 2545
b75cc8f9 2546 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2547 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2548 dst_release(dst);
2549}
2550
3a5ad2ee
DM
2551void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2552{
e2d118a1
LC
2553 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2554 sk->sk_uid);
3a5ad2ee
DM
2555}
2556EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2557
0dbaee3b 2558static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2559{
0dbaee3b
DM
2560 struct net_device *dev = dst->dev;
2561 unsigned int mtu = dst_mtu(dst);
2562 struct net *net = dev_net(dev);
2563
1da177e4
LT
2564 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2565
5578689a
DL
2566 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2567 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2568
2569 /*
1ab1457c
YH
2570 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2571 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2572 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2573 * rely only on pmtu discovery"
2574 */
2575 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2576 mtu = IPV6_MAXPLEN;
2577 return mtu;
2578}
2579
ebb762f2 2580static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2581{
d33e4553 2582 struct inet6_dev *idev;
d4ead6b3 2583 unsigned int mtu;
4b32b5ad
MKL
2584
2585 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2586 if (mtu)
30f78d8e 2587 goto out;
618f9bc7
SK
2588
2589 mtu = IPV6_MIN_MTU;
d33e4553
DM
2590
2591 rcu_read_lock();
2592 idev = __in6_dev_get(dst->dev);
2593 if (idev)
2594 mtu = idev->cnf.mtu6;
2595 rcu_read_unlock();
2596
30f78d8e 2597out:
14972cbd
RP
2598 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2599
2600 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2601}
2602
901731b8
DA
2603/* MTU selection:
2604 * 1. mtu on route is locked - use it
2605 * 2. mtu from nexthop exception
2606 * 3. mtu from egress device
2607 *
2608 * based on ip6_dst_mtu_forward and exception logic of
2609 * rt6_find_cached_rt; called with rcu_read_lock
2610 */
2611u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
2612 struct in6_addr *saddr)
2613{
2614 struct rt6_exception_bucket *bucket;
2615 struct rt6_exception *rt6_ex;
2616 struct in6_addr *src_key;
2617 struct inet6_dev *idev;
2618 u32 mtu = 0;
2619
2620 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2621 mtu = f6i->fib6_pmtu;
2622 if (mtu)
2623 goto out;
2624 }
2625
2626 src_key = NULL;
2627#ifdef CONFIG_IPV6_SUBTREES
2628 if (f6i->fib6_src.plen)
2629 src_key = saddr;
2630#endif
2631
2632 bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2633 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2634 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2635 mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2636
2637 if (likely(!mtu)) {
2638 struct net_device *dev = fib6_info_nh_dev(f6i);
2639
2640 mtu = IPV6_MIN_MTU;
2641 idev = __in6_dev_get(dev);
2642 if (idev && idev->cnf.mtu6 > mtu)
2643 mtu = idev->cnf.mtu6;
2644 }
2645
2646 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2647out:
2648 return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
2649}
2650
3b00944c 2651struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2652 struct flowi6 *fl6)
1da177e4 2653{
87a11578 2654 struct dst_entry *dst;
1da177e4
LT
2655 struct rt6_info *rt;
2656 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2657 struct net *net = dev_net(dev);
1da177e4 2658
38308473 2659 if (unlikely(!idev))
122bdf67 2660 return ERR_PTR(-ENODEV);
1da177e4 2661
ad706862 2662 rt = ip6_dst_alloc(net, dev, 0);
38308473 2663 if (unlikely(!rt)) {
1da177e4 2664 in6_dev_put(idev);
87a11578 2665 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2666 goto out;
2667 }
2668
8e2ec639 2669 rt->dst.flags |= DST_HOST;
588753f1 2670 rt->dst.input = ip6_input;
8e2ec639 2671 rt->dst.output = ip6_output;
550bab42 2672 rt->rt6i_gateway = fl6->daddr;
87a11578 2673 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2674 rt->rt6i_dst.plen = 128;
2675 rt->rt6i_idev = idev;
14edd87d 2676 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2677
4c981e28 2678 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2679 * do proper release of the net_device
2680 */
2681 rt6_uncached_list_add(rt);
81eb8447 2682 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2683
87a11578
DM
2684 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2685
1da177e4 2686out:
87a11578 2687 return dst;
1da177e4
LT
2688}
2689
569d3645 2690static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2691{
86393e52 2692 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2693 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2694 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2695 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2696 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2697 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2698 int entries;
7019b78e 2699
fc66f95c 2700 entries = dst_entries_get_fast(ops);
49a18d86 2701 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2702 entries <= rt_max_size)
1da177e4
LT
2703 goto out;
2704
6891a346 2705 net->ipv6.ip6_rt_gc_expire++;
14956643 2706 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2707 entries = dst_entries_get_slow(ops);
2708 if (entries < ops->gc_thresh)
7019b78e 2709 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2710out:
7019b78e 2711 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2712 return entries > rt_max_size;
1da177e4
LT
2713}
2714
8c14586f
DA
2715static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2716 struct fib6_config *cfg,
f4797b33
DA
2717 const struct in6_addr *gw_addr,
2718 u32 tbid, int flags)
8c14586f
DA
2719{
2720 struct flowi6 fl6 = {
2721 .flowi6_oif = cfg->fc_ifindex,
2722 .daddr = *gw_addr,
2723 .saddr = cfg->fc_prefsrc,
2724 };
2725 struct fib6_table *table;
2726 struct rt6_info *rt;
8c14586f 2727
f4797b33 2728 table = fib6_get_table(net, tbid);
8c14586f
DA
2729 if (!table)
2730 return NULL;
2731
2732 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2733 flags |= RT6_LOOKUP_F_HAS_SADDR;
2734
f4797b33 2735 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2736 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2737
2738 /* if table lookup failed, fall back to full lookup */
2739 if (rt == net->ipv6.ip6_null_entry) {
2740 ip6_rt_put(rt);
2741 rt = NULL;
2742 }
2743
2744 return rt;
2745}
2746
fc1e64e1
DA
2747static int ip6_route_check_nh_onlink(struct net *net,
2748 struct fib6_config *cfg,
9fbb704c 2749 const struct net_device *dev,
fc1e64e1
DA
2750 struct netlink_ext_ack *extack)
2751{
44750f84 2752 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2753 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2754 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2755 struct rt6_info *grt;
2756 int err;
2757
2758 err = 0;
2759 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2760 if (grt) {
58e354c0 2761 if (!grt->dst.error &&
4ed591c8
DA
2762 /* ignore match if it is the default route */
2763 grt->from && !ipv6_addr_any(&grt->from->fib6_dst.addr) &&
58e354c0 2764 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2765 NL_SET_ERR_MSG(extack,
2766 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2767 err = -EINVAL;
2768 }
2769
2770 ip6_rt_put(grt);
2771 }
2772
2773 return err;
2774}
2775
1edce99f
DA
2776static int ip6_route_check_nh(struct net *net,
2777 struct fib6_config *cfg,
2778 struct net_device **_dev,
2779 struct inet6_dev **idev)
2780{
2781 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2782 struct net_device *dev = _dev ? *_dev : NULL;
2783 struct rt6_info *grt = NULL;
2784 int err = -EHOSTUNREACH;
2785
2786 if (cfg->fc_table) {
f4797b33
DA
2787 int flags = RT6_LOOKUP_F_IFACE;
2788
2789 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2790 cfg->fc_table, flags);
1edce99f
DA
2791 if (grt) {
2792 if (grt->rt6i_flags & RTF_GATEWAY ||
2793 (dev && dev != grt->dst.dev)) {
2794 ip6_rt_put(grt);
2795 grt = NULL;
2796 }
2797 }
2798 }
2799
2800 if (!grt)
b75cc8f9 2801 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2802
2803 if (!grt)
2804 goto out;
2805
2806 if (dev) {
2807 if (dev != grt->dst.dev) {
2808 ip6_rt_put(grt);
2809 goto out;
2810 }
2811 } else {
2812 *_dev = dev = grt->dst.dev;
2813 *idev = grt->rt6i_idev;
2814 dev_hold(dev);
2815 in6_dev_hold(grt->rt6i_idev);
2816 }
2817
2818 if (!(grt->rt6i_flags & RTF_GATEWAY))
2819 err = 0;
2820
2821 ip6_rt_put(grt);
2822
2823out:
2824 return err;
2825}
2826
9fbb704c
DA
2827static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2828 struct net_device **_dev, struct inet6_dev **idev,
2829 struct netlink_ext_ack *extack)
2830{
2831 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2832 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2833 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2834 const struct net_device *dev = *_dev;
232378e8 2835 bool need_addr_check = !dev;
9fbb704c
DA
2836 int err = -EINVAL;
2837
2838 /* if gw_addr is local we will fail to detect this in case
2839 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2840 * will return already-added prefix route via interface that
2841 * prefix route was assigned to, which might be non-loopback.
2842 */
232378e8
DA
2843 if (dev &&
2844 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2845 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2846 goto out;
2847 }
2848
2849 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2850 /* IPv6 strictly inhibits using not link-local
2851 * addresses as nexthop address.
2852 * Otherwise, router will not able to send redirects.
2853 * It is very good, but in some (rare!) circumstances
2854 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2855 * some exceptions. --ANK
2856 * We allow IPv4-mapped nexthops to support RFC4798-type
2857 * addressing
2858 */
2859 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2860 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2861 goto out;
2862 }
2863
2864 if (cfg->fc_flags & RTNH_F_ONLINK)
2865 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2866 else
2867 err = ip6_route_check_nh(net, cfg, _dev, idev);
2868
2869 if (err)
2870 goto out;
2871 }
2872
2873 /* reload in case device was changed */
2874 dev = *_dev;
2875
2876 err = -EINVAL;
2877 if (!dev) {
2878 NL_SET_ERR_MSG(extack, "Egress device not specified");
2879 goto out;
2880 } else if (dev->flags & IFF_LOOPBACK) {
2881 NL_SET_ERR_MSG(extack,
2882 "Egress device can not be loopback device for this route");
2883 goto out;
2884 }
232378e8
DA
2885
2886 /* if we did not check gw_addr above, do so now that the
2887 * egress device has been resolved.
2888 */
2889 if (need_addr_check &&
2890 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2891 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2892 goto out;
2893 }
2894
9fbb704c
DA
2895 err = 0;
2896out:
2897 return err;
2898}
2899
8d1c802b 2900static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
acb54e3c 2901 gfp_t gfp_flags,
333c4301 2902 struct netlink_ext_ack *extack)
1da177e4 2903{
5578689a 2904 struct net *net = cfg->fc_nlinfo.nl_net;
8d1c802b 2905 struct fib6_info *rt = NULL;
1da177e4
LT
2906 struct net_device *dev = NULL;
2907 struct inet6_dev *idev = NULL;
c71099ac 2908 struct fib6_table *table;
1da177e4 2909 int addr_type;
8c5b83f0 2910 int err = -EINVAL;
1da177e4 2911
557c44be 2912 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2913 if (cfg->fc_flags & RTF_PCPU) {
2914 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2915 goto out;
d5d531cb 2916 }
557c44be 2917
2ea2352e
WW
2918 /* RTF_CACHE is an internal flag; can not be set by userspace */
2919 if (cfg->fc_flags & RTF_CACHE) {
2920 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2921 goto out;
2922 }
2923
e8478e80
DA
2924 if (cfg->fc_type > RTN_MAX) {
2925 NL_SET_ERR_MSG(extack, "Invalid route type");
2926 goto out;
2927 }
2928
d5d531cb
DA
2929 if (cfg->fc_dst_len > 128) {
2930 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2931 goto out;
2932 }
2933 if (cfg->fc_src_len > 128) {
2934 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2935 goto out;
d5d531cb 2936 }
1da177e4 2937#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2938 if (cfg->fc_src_len) {
2939 NL_SET_ERR_MSG(extack,
2940 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2941 goto out;
d5d531cb 2942 }
1da177e4 2943#endif
86872cb5 2944 if (cfg->fc_ifindex) {
1da177e4 2945 err = -ENODEV;
5578689a 2946 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2947 if (!dev)
2948 goto out;
2949 idev = in6_dev_get(dev);
2950 if (!idev)
2951 goto out;
2952 }
2953
86872cb5
TG
2954 if (cfg->fc_metric == 0)
2955 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2956
fc1e64e1
DA
2957 if (cfg->fc_flags & RTNH_F_ONLINK) {
2958 if (!dev) {
2959 NL_SET_ERR_MSG(extack,
2960 "Nexthop device required for onlink");
2961 err = -ENODEV;
2962 goto out;
2963 }
2964
2965 if (!(dev->flags & IFF_UP)) {
2966 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2967 err = -ENETDOWN;
2968 goto out;
2969 }
2970 }
2971
d71314b4 2972 err = -ENOBUFS;
38308473
DM
2973 if (cfg->fc_nlinfo.nlh &&
2974 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2975 table = fib6_get_table(net, cfg->fc_table);
38308473 2976 if (!table) {
f3213831 2977 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2978 table = fib6_new_table(net, cfg->fc_table);
2979 }
2980 } else {
2981 table = fib6_new_table(net, cfg->fc_table);
2982 }
38308473
DM
2983
2984 if (!table)
c71099ac 2985 goto out;
c71099ac 2986
93531c67
DA
2987 err = -ENOMEM;
2988 rt = fib6_info_alloc(gfp_flags);
2989 if (!rt)
1da177e4 2990 goto out;
93531c67 2991
767a2217
DA
2992 rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len);
2993 if (IS_ERR(rt->fib6_metrics)) {
2994 err = PTR_ERR(rt->fib6_metrics);
fda21d46
ED
2995 /* Do not leave garbage there. */
2996 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
767a2217
DA
2997 goto out;
2998 }
2999
93531c67
DA
3000 if (cfg->fc_flags & RTF_ADDRCONF)
3001 rt->dst_nocount = true;
1da177e4 3002
1716a961 3003 if (cfg->fc_flags & RTF_EXPIRES)
14895687 3004 fib6_set_expires(rt, jiffies +
1716a961
G
3005 clock_t_to_jiffies(cfg->fc_expires));
3006 else
14895687 3007 fib6_clean_expires(rt);
1da177e4 3008
86872cb5
TG
3009 if (cfg->fc_protocol == RTPROT_UNSPEC)
3010 cfg->fc_protocol = RTPROT_BOOT;
93c2fb25 3011 rt->fib6_protocol = cfg->fc_protocol;
86872cb5
TG
3012
3013 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4 3014
19e42e45
RP
3015 if (cfg->fc_encap) {
3016 struct lwtunnel_state *lwtstate;
3017
30357d7d 3018 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 3019 cfg->fc_encap, AF_INET6, cfg,
9ae28727 3020 &lwtstate, extack);
19e42e45
RP
3021 if (err)
3022 goto out;
5e670d84 3023 rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
19e42e45
RP
3024 }
3025
93c2fb25
DA
3026 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3027 rt->fib6_dst.plen = cfg->fc_dst_len;
3028 if (rt->fib6_dst.plen == 128)
3b6761d1 3029 rt->dst_host = true;
e5fd387a 3030
1da177e4 3031#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
3032 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3033 rt->fib6_src.plen = cfg->fc_src_len;
1da177e4
LT
3034#endif
3035
93c2fb25 3036 rt->fib6_metric = cfg->fc_metric;
5e670d84 3037 rt->fib6_nh.nh_weight = 1;
1da177e4 3038
e8478e80 3039 rt->fib6_type = cfg->fc_type;
1da177e4
LT
3040
3041 /* We cannot add true routes via loopback here,
3042 they would result in kernel looping; promote them to reject routes
3043 */
86872cb5 3044 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
3045 (dev && (dev->flags & IFF_LOOPBACK) &&
3046 !(addr_type & IPV6_ADDR_LOOPBACK) &&
3047 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 3048 /* hold loopback dev/idev if we haven't done so. */
5578689a 3049 if (dev != net->loopback_dev) {
1da177e4
LT
3050 if (dev) {
3051 dev_put(dev);
3052 in6_dev_put(idev);
3053 }
5578689a 3054 dev = net->loopback_dev;
1da177e4
LT
3055 dev_hold(dev);
3056 idev = in6_dev_get(dev);
3057 if (!idev) {
3058 err = -ENODEV;
3059 goto out;
3060 }
3061 }
93c2fb25 3062 rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
1da177e4
LT
3063 goto install_route;
3064 }
3065
86872cb5 3066 if (cfg->fc_flags & RTF_GATEWAY) {
9fbb704c
DA
3067 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3068 if (err)
48ed7b26 3069 goto out;
1da177e4 3070
93531c67 3071 rt->fib6_nh.nh_gw = cfg->fc_gateway;
1da177e4
LT
3072 }
3073
3074 err = -ENODEV;
38308473 3075 if (!dev)
1da177e4
LT
3076 goto out;
3077
428604fb
LB
3078 if (idev->cnf.disable_ipv6) {
3079 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3080 err = -EACCES;
3081 goto out;
3082 }
3083
955ec4cb
DA
3084 if (!(dev->flags & IFF_UP)) {
3085 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3086 err = -ENETDOWN;
3087 goto out;
3088 }
3089
c3968a85
DW
3090 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3091 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3092 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3093 err = -EINVAL;
3094 goto out;
3095 }
93c2fb25
DA
3096 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3097 rt->fib6_prefsrc.plen = 128;
c3968a85 3098 } else
93c2fb25 3099 rt->fib6_prefsrc.plen = 0;
c3968a85 3100
93c2fb25 3101 rt->fib6_flags = cfg->fc_flags;
1da177e4
LT
3102
3103install_route:
93c2fb25 3104 if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
5609b80a 3105 !netif_carrier_ok(dev))
5e670d84
DA
3106 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3107 rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
93531c67 3108 rt->fib6_nh.nh_dev = dev;
93c2fb25 3109 rt->fib6_table = table;
63152fc0 3110
dcd1f572
DA
3111 if (idev)
3112 in6_dev_put(idev);
3113
8c5b83f0 3114 return rt;
6b9ea5a6
RP
3115out:
3116 if (dev)
3117 dev_put(dev);
3118 if (idev)
3119 in6_dev_put(idev);
6b9ea5a6 3120
93531c67 3121 fib6_info_release(rt);
8c5b83f0 3122 return ERR_PTR(err);
6b9ea5a6
RP
3123}
3124
acb54e3c 3125int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
333c4301 3126 struct netlink_ext_ack *extack)
6b9ea5a6 3127{
8d1c802b 3128 struct fib6_info *rt;
6b9ea5a6
RP
3129 int err;
3130
acb54e3c 3131 rt = ip6_route_info_create(cfg, gfp_flags, extack);
d4ead6b3
DA
3132 if (IS_ERR(rt))
3133 return PTR_ERR(rt);
6b9ea5a6 3134
d4ead6b3 3135 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
93531c67 3136 fib6_info_release(rt);
6b9ea5a6 3137
1da177e4
LT
3138 return err;
3139}
3140
8d1c802b 3141static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
1da177e4 3142{
afb1d4b5 3143 struct net *net = info->nl_net;
c71099ac 3144 struct fib6_table *table;
afb1d4b5 3145 int err;
1da177e4 3146
421842ed 3147 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3148 err = -ENOENT;
3149 goto out;
3150 }
6c813a72 3151
93c2fb25 3152 table = rt->fib6_table;
66f5d6ce 3153 spin_lock_bh(&table->tb6_lock);
86872cb5 3154 err = fib6_del(rt, info);
66f5d6ce 3155 spin_unlock_bh(&table->tb6_lock);
1da177e4 3156
6825a26c 3157out:
93531c67 3158 fib6_info_release(rt);
1da177e4
LT
3159 return err;
3160}
3161
8d1c802b 3162int ip6_del_rt(struct net *net, struct fib6_info *rt)
e0a1ad73 3163{
afb1d4b5
DA
3164 struct nl_info info = { .nl_net = net };
3165
528c4ceb 3166 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3167}
3168
8d1c802b 3169static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
0ae81335
DA
3170{
3171 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3172 struct net *net = info->nl_net;
16a16cd3 3173 struct sk_buff *skb = NULL;
0ae81335 3174 struct fib6_table *table;
e3330039 3175 int err = -ENOENT;
0ae81335 3176
421842ed 3177 if (rt == net->ipv6.fib6_null_entry)
e3330039 3178 goto out_put;
93c2fb25 3179 table = rt->fib6_table;
66f5d6ce 3180 spin_lock_bh(&table->tb6_lock);
0ae81335 3181
93c2fb25 3182 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
8d1c802b 3183 struct fib6_info *sibling, *next_sibling;
0ae81335 3184
16a16cd3
DA
3185 /* prefer to send a single notification with all hops */
3186 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3187 if (skb) {
3188 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3189
d4ead6b3 3190 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3191 NULL, NULL, 0, RTM_DELROUTE,
3192 info->portid, seq, 0) < 0) {
3193 kfree_skb(skb);
3194 skb = NULL;
3195 } else
3196 info->skip_notify = 1;
3197 }
3198
0ae81335 3199 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25
DA
3200 &rt->fib6_siblings,
3201 fib6_siblings) {
0ae81335
DA
3202 err = fib6_del(sibling, info);
3203 if (err)
e3330039 3204 goto out_unlock;
0ae81335
DA
3205 }
3206 }
3207
3208 err = fib6_del(rt, info);
e3330039 3209out_unlock:
66f5d6ce 3210 spin_unlock_bh(&table->tb6_lock);
e3330039 3211out_put:
93531c67 3212 fib6_info_release(rt);
16a16cd3
DA
3213
3214 if (skb) {
e3330039 3215 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3216 info->nlh, gfp_any());
3217 }
0ae81335
DA
3218 return err;
3219}
3220
23fb93a4
DA
3221static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3222{
3223 int rc = -ESRCH;
3224
3225 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3226 goto out;
3227
3228 if (cfg->fc_flags & RTF_GATEWAY &&
3229 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3230 goto out;
761f6026
XL
3231
3232 rc = rt6_remove_exception_rt(rt);
23fb93a4
DA
3233out:
3234 return rc;
3235}
3236
333c4301
DA
3237static int ip6_route_del(struct fib6_config *cfg,
3238 struct netlink_ext_ack *extack)
1da177e4 3239{
8d1c802b 3240 struct rt6_info *rt_cache;
c71099ac 3241 struct fib6_table *table;
8d1c802b 3242 struct fib6_info *rt;
1da177e4 3243 struct fib6_node *fn;
1da177e4
LT
3244 int err = -ESRCH;
3245
5578689a 3246 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3247 if (!table) {
3248 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3249 return err;
d5d531cb 3250 }
c71099ac 3251
66f5d6ce 3252 rcu_read_lock();
1da177e4 3253
c71099ac 3254 fn = fib6_locate(&table->tb6_root,
86872cb5 3255 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3256 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3257 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3258
1da177e4 3259 if (fn) {
66f5d6ce 3260 for_each_fib6_node_rt_rcu(fn) {
2b760fcf 3261 if (cfg->fc_flags & RTF_CACHE) {
23fb93a4
DA
3262 int rc;
3263
2b760fcf
WW
3264 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3265 &cfg->fc_src);
23fb93a4
DA
3266 if (rt_cache) {
3267 rc = ip6_del_cached_rt(rt_cache, cfg);
9e575010
ED
3268 if (rc != -ESRCH) {
3269 rcu_read_unlock();
23fb93a4 3270 return rc;
9e575010 3271 }
23fb93a4
DA
3272 }
3273 continue;
2b760fcf 3274 }
86872cb5 3275 if (cfg->fc_ifindex &&
5e670d84
DA
3276 (!rt->fib6_nh.nh_dev ||
3277 rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3278 continue;
86872cb5 3279 if (cfg->fc_flags & RTF_GATEWAY &&
5e670d84 3280 !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
1da177e4 3281 continue;
93c2fb25 3282 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
1da177e4 3283 continue;
93c2fb25 3284 if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
c2ed1880 3285 continue;
e873e4b9
WW
3286 if (!fib6_info_hold_safe(rt))
3287 continue;
66f5d6ce 3288 rcu_read_unlock();
1da177e4 3289
0ae81335
DA
3290 /* if gateway was specified only delete the one hop */
3291 if (cfg->fc_flags & RTF_GATEWAY)
3292 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3293
3294 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3295 }
3296 }
66f5d6ce 3297 rcu_read_unlock();
1da177e4
LT
3298
3299 return err;
3300}
3301
6700c270 3302static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3303{
a6279458 3304 struct netevent_redirect netevent;
e8599ff4 3305 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3306 struct ndisc_options ndopts;
3307 struct inet6_dev *in6_dev;
3308 struct neighbour *neigh;
a68886a6 3309 struct fib6_info *from;
71bcdba0 3310 struct rd_msg *msg;
6e157b6a
DM
3311 int optlen, on_link;
3312 u8 *lladdr;
e8599ff4 3313
29a3cad5 3314 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3315 optlen -= sizeof(*msg);
e8599ff4
DM
3316
3317 if (optlen < 0) {
6e157b6a 3318 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3319 return;
3320 }
3321
71bcdba0 3322 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3323
71bcdba0 3324 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3325 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3326 return;
3327 }
3328
6e157b6a 3329 on_link = 0;
71bcdba0 3330 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3331 on_link = 1;
71bcdba0 3332 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3333 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3334 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3335 return;
3336 }
3337
3338 in6_dev = __in6_dev_get(skb->dev);
3339 if (!in6_dev)
3340 return;
3341 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3342 return;
3343
3344 /* RFC2461 8.1:
3345 * The IP source address of the Redirect MUST be the same as the current
3346 * first-hop router for the specified ICMP Destination Address.
3347 */
3348
f997c55c 3349 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3350 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3351 return;
3352 }
6e157b6a
DM
3353
3354 lladdr = NULL;
e8599ff4
DM
3355 if (ndopts.nd_opts_tgt_lladdr) {
3356 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3357 skb->dev);
3358 if (!lladdr) {
3359 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3360 return;
3361 }
3362 }
3363
6e157b6a 3364 rt = (struct rt6_info *) dst;
ec13ad1d 3365 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3366 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3367 return;
6e157b6a 3368 }
e8599ff4 3369
6e157b6a
DM
3370 /* Redirect received -> path was valid.
3371 * Look, redirects are sent only in response to data packets,
3372 * so that this nexthop apparently is reachable. --ANK
3373 */
0dec879f 3374 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3375
71bcdba0 3376 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3377 if (!neigh)
3378 return;
a6279458 3379
1da177e4
LT
3380 /*
3381 * We have finally decided to accept it.
3382 */
3383
f997c55c 3384 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3385 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3386 NEIGH_UPDATE_F_OVERRIDE|
3387 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3388 NEIGH_UPDATE_F_ISROUTER)),
3389 NDISC_REDIRECT, &ndopts);
1da177e4 3390
4d85cd0c 3391 rcu_read_lock();
a68886a6 3392 from = rcu_dereference(rt->from);
e873e4b9
WW
3393 /* This fib6_info_hold() is safe here because we hold reference to rt
3394 * and rt already holds reference to fib6_info.
3395 */
8a14e46f 3396 fib6_info_hold(from);
4d85cd0c 3397 rcu_read_unlock();
8a14e46f
DA
3398
3399 nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
38308473 3400 if (!nrt)
1da177e4
LT
3401 goto out;
3402
3403 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3404 if (on_link)
3405 nrt->rt6i_flags &= ~RTF_GATEWAY;
3406
4e3fd7a0 3407 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3408
2b760fcf
WW
3409 /* No need to remove rt from the exception table if rt is
3410 * a cached route because rt6_insert_exception() will
3411 * takes care of it
3412 */
8a14e46f 3413 if (rt6_insert_exception(nrt, from)) {
2b760fcf
WW
3414 dst_release_immediate(&nrt->dst);
3415 goto out;
3416 }
1da177e4 3417
d8d1f30b
CG
3418 netevent.old = &rt->dst;
3419 netevent.new = &nrt->dst;
71bcdba0 3420 netevent.daddr = &msg->dest;
60592833 3421 netevent.neigh = neigh;
8d71740c
TT
3422 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3423
1da177e4 3424out:
8a14e46f 3425 fib6_info_release(from);
e8599ff4 3426 neigh_release(neigh);
6e157b6a
DM
3427}
3428
70ceb4f5 3429#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 3430static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 3431 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3432 const struct in6_addr *gwaddr,
3433 struct net_device *dev)
70ceb4f5 3434{
830218c1
DA
3435 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3436 int ifindex = dev->ifindex;
70ceb4f5 3437 struct fib6_node *fn;
8d1c802b 3438 struct fib6_info *rt = NULL;
c71099ac
TG
3439 struct fib6_table *table;
3440
830218c1 3441 table = fib6_get_table(net, tb_id);
38308473 3442 if (!table)
c71099ac 3443 return NULL;
70ceb4f5 3444
66f5d6ce 3445 rcu_read_lock();
38fbeeee 3446 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3447 if (!fn)
3448 goto out;
3449
66f5d6ce 3450 for_each_fib6_node_rt_rcu(fn) {
5e670d84 3451 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
70ceb4f5 3452 continue;
93c2fb25 3453 if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
70ceb4f5 3454 continue;
5e670d84 3455 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
70ceb4f5 3456 continue;
e873e4b9
WW
3457 if (!fib6_info_hold_safe(rt))
3458 continue;
70ceb4f5
YH
3459 break;
3460 }
3461out:
66f5d6ce 3462 rcu_read_unlock();
70ceb4f5
YH
3463 return rt;
3464}
3465
8d1c802b 3466static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 3467 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3468 const struct in6_addr *gwaddr,
3469 struct net_device *dev,
95c96174 3470 unsigned int pref)
70ceb4f5 3471{
86872cb5 3472 struct fib6_config cfg = {
238fc7ea 3473 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3474 .fc_ifindex = dev->ifindex,
86872cb5
TG
3475 .fc_dst_len = prefixlen,
3476 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3477 RTF_UP | RTF_PREF(pref),
b91d5329 3478 .fc_protocol = RTPROT_RA,
e8478e80 3479 .fc_type = RTN_UNICAST,
15e47304 3480 .fc_nlinfo.portid = 0,
efa2cea0
DL
3481 .fc_nlinfo.nlh = NULL,
3482 .fc_nlinfo.nl_net = net,
86872cb5
TG
3483 };
3484
830218c1 3485 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3486 cfg.fc_dst = *prefix;
3487 cfg.fc_gateway = *gwaddr;
70ceb4f5 3488
e317da96
YH
3489 /* We should treat it as a default route if prefix length is 0. */
3490 if (!prefixlen)
86872cb5 3491 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3492
acb54e3c 3493 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
70ceb4f5 3494
830218c1 3495 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3496}
3497#endif
3498
8d1c802b 3499struct fib6_info *rt6_get_dflt_router(struct net *net,
afb1d4b5
DA
3500 const struct in6_addr *addr,
3501 struct net_device *dev)
1ab1457c 3502{
830218c1 3503 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
8d1c802b 3504 struct fib6_info *rt;
c71099ac 3505 struct fib6_table *table;
1da177e4 3506
afb1d4b5 3507 table = fib6_get_table(net, tb_id);
38308473 3508 if (!table)
c71099ac 3509 return NULL;
1da177e4 3510
66f5d6ce
WW
3511 rcu_read_lock();
3512 for_each_fib6_node_rt_rcu(&table->tb6_root) {
5e670d84 3513 if (dev == rt->fib6_nh.nh_dev &&
93c2fb25 3514 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
5e670d84 3515 ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
1da177e4
LT
3516 break;
3517 }
e873e4b9
WW
3518 if (rt && !fib6_info_hold_safe(rt))
3519 rt = NULL;
66f5d6ce 3520 rcu_read_unlock();
1da177e4
LT
3521 return rt;
3522}
3523
8d1c802b 3524struct fib6_info *rt6_add_dflt_router(struct net *net,
afb1d4b5 3525 const struct in6_addr *gwaddr,
ebacaaa0
YH
3526 struct net_device *dev,
3527 unsigned int pref)
1da177e4 3528{
86872cb5 3529 struct fib6_config cfg = {
ca254490 3530 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3531 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3532 .fc_ifindex = dev->ifindex,
3533 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3534 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3535 .fc_protocol = RTPROT_RA,
e8478e80 3536 .fc_type = RTN_UNICAST,
15e47304 3537 .fc_nlinfo.portid = 0,
5578689a 3538 .fc_nlinfo.nlh = NULL,
afb1d4b5 3539 .fc_nlinfo.nl_net = net,
86872cb5 3540 };
1da177e4 3541
4e3fd7a0 3542 cfg.fc_gateway = *gwaddr;
1da177e4 3543
acb54e3c 3544 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
830218c1
DA
3545 struct fib6_table *table;
3546
3547 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3548 if (table)
3549 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3550 }
1da177e4 3551
afb1d4b5 3552 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3553}
3554
afb1d4b5
DA
3555static void __rt6_purge_dflt_routers(struct net *net,
3556 struct fib6_table *table)
1da177e4 3557{
8d1c802b 3558 struct fib6_info *rt;
1da177e4
LT
3559
3560restart:
66f5d6ce
WW
3561 rcu_read_lock();
3562 for_each_fib6_node_rt_rcu(&table->tb6_root) {
dcd1f572
DA
3563 struct net_device *dev = fib6_info_nh_dev(rt);
3564 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3565
93c2fb25 3566 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
e873e4b9
WW
3567 (!idev || idev->cnf.accept_ra != 2) &&
3568 fib6_info_hold_safe(rt)) {
93531c67
DA
3569 rcu_read_unlock();
3570 ip6_del_rt(net, rt);
1da177e4
LT
3571 goto restart;
3572 }
3573 }
66f5d6ce 3574 rcu_read_unlock();
830218c1
DA
3575
3576 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3577}
3578
3579void rt6_purge_dflt_routers(struct net *net)
3580{
3581 struct fib6_table *table;
3582 struct hlist_head *head;
3583 unsigned int h;
3584
3585 rcu_read_lock();
3586
3587 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3588 head = &net->ipv6.fib_table_hash[h];
3589 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3590 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3591 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3592 }
3593 }
3594
3595 rcu_read_unlock();
1da177e4
LT
3596}
3597
5578689a
DL
3598static void rtmsg_to_fib6_config(struct net *net,
3599 struct in6_rtmsg *rtmsg,
86872cb5
TG
3600 struct fib6_config *cfg)
3601{
8823a3ac
3602 *cfg = (struct fib6_config){
3603 .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3604 : RT6_TABLE_MAIN,
3605 .fc_ifindex = rtmsg->rtmsg_ifindex,
3606 .fc_metric = rtmsg->rtmsg_metric,
3607 .fc_expires = rtmsg->rtmsg_info,
3608 .fc_dst_len = rtmsg->rtmsg_dst_len,
3609 .fc_src_len = rtmsg->rtmsg_src_len,
3610 .fc_flags = rtmsg->rtmsg_flags,
3611 .fc_type = rtmsg->rtmsg_type,
3612
3613 .fc_nlinfo.nl_net = net,
3614
3615 .fc_dst = rtmsg->rtmsg_dst,
3616 .fc_src = rtmsg->rtmsg_src,
3617 .fc_gateway = rtmsg->rtmsg_gateway,
3618 };
86872cb5
TG
3619}
3620
5578689a 3621int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3622{
86872cb5 3623 struct fib6_config cfg;
1da177e4
LT
3624 struct in6_rtmsg rtmsg;
3625 int err;
3626
67ba4152 3627 switch (cmd) {
1da177e4
LT
3628 case SIOCADDRT: /* Add a route */
3629 case SIOCDELRT: /* Delete a route */
af31f412 3630 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3631 return -EPERM;
3632 err = copy_from_user(&rtmsg, arg,
3633 sizeof(struct in6_rtmsg));
3634 if (err)
3635 return -EFAULT;
86872cb5 3636
5578689a 3637 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3638
1da177e4
LT
3639 rtnl_lock();
3640 switch (cmd) {
3641 case SIOCADDRT:
acb54e3c 3642 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
1da177e4
LT
3643 break;
3644 case SIOCDELRT:
333c4301 3645 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3646 break;
3647 default:
3648 err = -EINVAL;
3649 }
3650 rtnl_unlock();
3651
3652 return err;
3ff50b79 3653 }
1da177e4
LT
3654
3655 return -EINVAL;
3656}
3657
3658/*
3659 * Drop the packet on the floor
3660 */
3661
d5fdd6ba 3662static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3663{
612f09e8 3664 int type;
adf30907 3665 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3666 switch (ipstats_mib_noroutes) {
3667 case IPSTATS_MIB_INNOROUTES:
0660e03f 3668 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3669 if (type == IPV6_ADDR_ANY) {
bdb7cc64
SS
3670 IP6_INC_STATS(dev_net(dst->dev),
3671 __in6_dev_get_safely(skb->dev),
3bd653c8 3672 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3673 break;
3674 }
3675 /* FALLTHROUGH */
3676 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3677 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3678 ipstats_mib_noroutes);
612f09e8
YH
3679 break;
3680 }
3ffe533c 3681 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3682 kfree_skb(skb);
3683 return 0;
3684}
3685
9ce8ade0
TG
3686static int ip6_pkt_discard(struct sk_buff *skb)
3687{
612f09e8 3688 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3689}
3690
ede2059d 3691static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3692{
adf30907 3693 skb->dev = skb_dst(skb)->dev;
612f09e8 3694 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3695}
3696
9ce8ade0
TG
3697static int ip6_pkt_prohibit(struct sk_buff *skb)
3698{
612f09e8 3699 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3700}
3701
ede2059d 3702static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3703{
adf30907 3704 skb->dev = skb_dst(skb)->dev;
612f09e8 3705 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3706}
3707
1da177e4
LT
3708/*
3709 * Allocate a dst for local (unicast / anycast) address.
3710 */
3711
360a9887
DA
3712struct fib6_info *addrconf_f6i_alloc(struct net *net,
3713 struct inet6_dev *idev,
3714 const struct in6_addr *addr,
3715 bool anycast, gfp_t gfp_flags)
1da177e4 3716{
ca254490 3717 u32 tb_id;
4832c30d 3718 struct net_device *dev = idev->dev;
360a9887 3719 struct fib6_info *f6i;
5f02ce24 3720
360a9887
DA
3721 f6i = fib6_info_alloc(gfp_flags);
3722 if (!f6i)
1da177e4
LT
3723 return ERR_PTR(-ENOMEM);
3724
767a2217 3725 f6i->fib6_metrics = ip_fib_metrics_init(net, NULL, 0);
360a9887 3726 f6i->dst_nocount = true;
360a9887
DA
3727 f6i->dst_host = true;
3728 f6i->fib6_protocol = RTPROT_KERNEL;
3729 f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
e8478e80 3730 if (anycast) {
360a9887
DA
3731 f6i->fib6_type = RTN_ANYCAST;
3732 f6i->fib6_flags |= RTF_ANYCAST;
e8478e80 3733 } else {
360a9887
DA
3734 f6i->fib6_type = RTN_LOCAL;
3735 f6i->fib6_flags |= RTF_LOCAL;
e8478e80 3736 }
1da177e4 3737
360a9887 3738 f6i->fib6_nh.nh_gw = *addr;
93531c67 3739 dev_hold(dev);
360a9887
DA
3740 f6i->fib6_nh.nh_dev = dev;
3741 f6i->fib6_dst.addr = *addr;
3742 f6i->fib6_dst.plen = 128;
ca254490 3743 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
360a9887 3744 f6i->fib6_table = fib6_get_table(net, tb_id);
1da177e4 3745
360a9887 3746 return f6i;
1da177e4
LT
3747}
3748
c3968a85
DW
3749/* remove deleted ip from prefsrc entries */
3750struct arg_dev_net_ip {
3751 struct net_device *dev;
3752 struct net *net;
3753 struct in6_addr *addr;
3754};
3755
8d1c802b 3756static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
c3968a85
DW
3757{
3758 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3759 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3760 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3761
5e670d84 3762 if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
421842ed 3763 rt != net->ipv6.fib6_null_entry &&
93c2fb25 3764 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
60006a48 3765 spin_lock_bh(&rt6_exception_lock);
c3968a85 3766 /* remove prefsrc entry */
93c2fb25 3767 rt->fib6_prefsrc.plen = 0;
60006a48 3768 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3769 }
3770 return 0;
3771}
3772
3773void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3774{
3775 struct net *net = dev_net(ifp->idev->dev);
3776 struct arg_dev_net_ip adni = {
3777 .dev = ifp->idev->dev,
3778 .net = net,
3779 .addr = &ifp->addr,
3780 };
0c3584d5 3781 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3782}
3783
be7a010d 3784#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3785
3786/* Remove routers and update dst entries when gateway turn into host. */
8d1c802b 3787static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
be7a010d
DJ
3788{
3789 struct in6_addr *gateway = (struct in6_addr *)arg;
3790
93c2fb25 3791 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
5e670d84 3792 ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
be7a010d
DJ
3793 return -1;
3794 }
b16cb459
WW
3795
3796 /* Further clean up cached routes in exception table.
3797 * This is needed because cached route may have a different
3798 * gateway than its 'parent' in the case of an ip redirect.
3799 */
3800 rt6_exceptions_clean_tohost(rt, gateway);
3801
be7a010d
DJ
3802 return 0;
3803}
3804
3805void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3806{
3807 fib6_clean_all(net, fib6_clean_tohost, gateway);
3808}
3809
2127d95a
IS
3810struct arg_netdev_event {
3811 const struct net_device *dev;
4c981e28
IS
3812 union {
3813 unsigned int nh_flags;
3814 unsigned long event;
3815 };
2127d95a
IS
3816};
3817
8d1c802b 3818static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
d7dedee1 3819{
8d1c802b 3820 struct fib6_info *iter;
d7dedee1
IS
3821 struct fib6_node *fn;
3822
93c2fb25
DA
3823 fn = rcu_dereference_protected(rt->fib6_node,
3824 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3825 iter = rcu_dereference_protected(fn->leaf,
93c2fb25 3826 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3827 while (iter) {
93c2fb25 3828 if (iter->fib6_metric == rt->fib6_metric &&
33bd5ac5 3829 rt6_qualify_for_ecmp(iter))
d7dedee1 3830 return iter;
8fb11a9a 3831 iter = rcu_dereference_protected(iter->fib6_next,
93c2fb25 3832 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1
IS
3833 }
3834
3835 return NULL;
3836}
3837
8d1c802b 3838static bool rt6_is_dead(const struct fib6_info *rt)
d7dedee1 3839{
5e670d84
DA
3840 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3841 (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
dcd1f572 3842 fib6_ignore_linkdown(rt)))
d7dedee1
IS
3843 return true;
3844
3845 return false;
3846}
3847
8d1c802b 3848static int rt6_multipath_total_weight(const struct fib6_info *rt)
d7dedee1 3849{
8d1c802b 3850 struct fib6_info *iter;
d7dedee1
IS
3851 int total = 0;
3852
3853 if (!rt6_is_dead(rt))
5e670d84 3854 total += rt->fib6_nh.nh_weight;
d7dedee1 3855
93c2fb25 3856 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
d7dedee1 3857 if (!rt6_is_dead(iter))
5e670d84 3858 total += iter->fib6_nh.nh_weight;
d7dedee1
IS
3859 }
3860
3861 return total;
3862}
3863
8d1c802b 3864static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
d7dedee1
IS
3865{
3866 int upper_bound = -1;
3867
3868 if (!rt6_is_dead(rt)) {
5e670d84 3869 *weight += rt->fib6_nh.nh_weight;
d7dedee1
IS
3870 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3871 total) - 1;
3872 }
5e670d84 3873 atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
d7dedee1
IS
3874}
3875
8d1c802b 3876static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
d7dedee1 3877{
8d1c802b 3878 struct fib6_info *iter;
d7dedee1
IS
3879 int weight = 0;
3880
3881 rt6_upper_bound_set(rt, &weight, total);
3882
93c2fb25 3883 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
d7dedee1
IS
3884 rt6_upper_bound_set(iter, &weight, total);
3885}
3886
8d1c802b 3887void rt6_multipath_rebalance(struct fib6_info *rt)
d7dedee1 3888{
8d1c802b 3889 struct fib6_info *first;
d7dedee1
IS
3890 int total;
3891
3892 /* In case the entire multipath route was marked for flushing,
3893 * then there is no need to rebalance upon the removal of every
3894 * sibling route.
3895 */
93c2fb25 3896 if (!rt->fib6_nsiblings || rt->should_flush)
d7dedee1
IS
3897 return;
3898
3899 /* During lookup routes are evaluated in order, so we need to
3900 * make sure upper bounds are assigned from the first sibling
3901 * onwards.
3902 */
3903 first = rt6_multipath_first_sibling(rt);
3904 if (WARN_ON_ONCE(!first))
3905 return;
3906
3907 total = rt6_multipath_total_weight(first);
3908 rt6_multipath_upper_bound_set(first, total);
3909}
3910
8d1c802b 3911static int fib6_ifup(struct fib6_info *rt, void *p_arg)
2127d95a
IS
3912{
3913 const struct arg_netdev_event *arg = p_arg;
7aef6859 3914 struct net *net = dev_net(arg->dev);
2127d95a 3915
421842ed 3916 if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
5e670d84 3917 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
7aef6859 3918 fib6_update_sernum_upto_root(net, rt);
d7dedee1 3919 rt6_multipath_rebalance(rt);
1de178ed 3920 }
2127d95a
IS
3921
3922 return 0;
3923}
3924
3925void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3926{
3927 struct arg_netdev_event arg = {
3928 .dev = dev,
6802f3ad
IS
3929 {
3930 .nh_flags = nh_flags,
3931 },
2127d95a
IS
3932 };
3933
3934 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3935 arg.nh_flags |= RTNH_F_LINKDOWN;
3936
3937 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3938}
3939
8d1c802b 3940static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
1de178ed
IS
3941 const struct net_device *dev)
3942{
8d1c802b 3943 struct fib6_info *iter;
1de178ed 3944
5e670d84 3945 if (rt->fib6_nh.nh_dev == dev)
1de178ed 3946 return true;
93c2fb25 3947 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84 3948 if (iter->fib6_nh.nh_dev == dev)
1de178ed
IS
3949 return true;
3950
3951 return false;
3952}
3953
8d1c802b 3954static void rt6_multipath_flush(struct fib6_info *rt)
1de178ed 3955{
8d1c802b 3956 struct fib6_info *iter;
1de178ed
IS
3957
3958 rt->should_flush = 1;
93c2fb25 3959 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1de178ed
IS
3960 iter->should_flush = 1;
3961}
3962
8d1c802b 3963static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
1de178ed
IS
3964 const struct net_device *down_dev)
3965{
8d1c802b 3966 struct fib6_info *iter;
1de178ed
IS
3967 unsigned int dead = 0;
3968
5e670d84
DA
3969 if (rt->fib6_nh.nh_dev == down_dev ||
3970 rt->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed 3971 dead++;
93c2fb25 3972 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
3973 if (iter->fib6_nh.nh_dev == down_dev ||
3974 iter->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed
IS
3975 dead++;
3976
3977 return dead;
3978}
3979
8d1c802b 3980static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
1de178ed
IS
3981 const struct net_device *dev,
3982 unsigned int nh_flags)
3983{
8d1c802b 3984 struct fib6_info *iter;
1de178ed 3985
5e670d84
DA
3986 if (rt->fib6_nh.nh_dev == dev)
3987 rt->fib6_nh.nh_flags |= nh_flags;
93c2fb25 3988 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
3989 if (iter->fib6_nh.nh_dev == dev)
3990 iter->fib6_nh.nh_flags |= nh_flags;
1de178ed
IS
3991}
3992
a1a22c12 3993/* called with write lock held for table with rt */
8d1c802b 3994static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
1da177e4 3995{
4c981e28
IS
3996 const struct arg_netdev_event *arg = p_arg;
3997 const struct net_device *dev = arg->dev;
7aef6859 3998 struct net *net = dev_net(dev);
8ed67789 3999
421842ed 4000 if (rt == net->ipv6.fib6_null_entry)
27c6fa73
IS
4001 return 0;
4002
4003 switch (arg->event) {
4004 case NETDEV_UNREGISTER:
5e670d84 4005 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
27c6fa73 4006 case NETDEV_DOWN:
1de178ed 4007 if (rt->should_flush)
27c6fa73 4008 return -1;
93c2fb25 4009 if (!rt->fib6_nsiblings)
5e670d84 4010 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
1de178ed
IS
4011 if (rt6_multipath_uses_dev(rt, dev)) {
4012 unsigned int count;
4013
4014 count = rt6_multipath_dead_count(rt, dev);
93c2fb25 4015 if (rt->fib6_nsiblings + 1 == count) {
1de178ed
IS
4016 rt6_multipath_flush(rt);
4017 return -1;
4018 }
4019 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4020 RTNH_F_LINKDOWN);
7aef6859 4021 fib6_update_sernum(net, rt);
d7dedee1 4022 rt6_multipath_rebalance(rt);
1de178ed
IS
4023 }
4024 return -2;
27c6fa73 4025 case NETDEV_CHANGE:
5e670d84 4026 if (rt->fib6_nh.nh_dev != dev ||
93c2fb25 4027 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 4028 break;
5e670d84 4029 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 4030 rt6_multipath_rebalance(rt);
27c6fa73 4031 break;
2b241361 4032 }
c159d30c 4033
1da177e4
LT
4034 return 0;
4035}
4036
27c6fa73 4037void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 4038{
4c981e28 4039 struct arg_netdev_event arg = {
8ed67789 4040 .dev = dev,
6802f3ad
IS
4041 {
4042 .event = event,
4043 },
8ed67789 4044 };
7c6bb7d2 4045 struct net *net = dev_net(dev);
8ed67789 4046
7c6bb7d2
DA
4047 if (net->ipv6.sysctl.skip_notify_on_dev_down)
4048 fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4049 else
4050 fib6_clean_all(net, fib6_ifdown, &arg);
4c981e28
IS
4051}
4052
4053void rt6_disable_ip(struct net_device *dev, unsigned long event)
4054{
4055 rt6_sync_down_dev(dev, event);
4056 rt6_uncached_list_flush_dev(dev_net(dev), dev);
4057 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
4058}
4059
95c96174 4060struct rt6_mtu_change_arg {
1da177e4 4061 struct net_device *dev;
95c96174 4062 unsigned int mtu;
1da177e4
LT
4063};
4064
8d1c802b 4065static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4066{
4067 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4068 struct inet6_dev *idev;
4069
4070 /* In IPv6 pmtu discovery is not optional,
4071 so that RTAX_MTU lock cannot disable it.
4072 We still use this lock to block changes
4073 caused by addrconf/ndisc.
4074 */
4075
4076 idev = __in6_dev_get(arg->dev);
38308473 4077 if (!idev)
1da177e4
LT
4078 return 0;
4079
4080 /* For administrative MTU increase, there is no way to discover
4081 IPv6 PMTU increase, so PMTU increase should be updated here.
4082 Since RFC 1981 doesn't include administrative MTU increase
4083 update PMTU increase is a MUST. (i.e. jumbo frame)
4084 */
5e670d84 4085 if (rt->fib6_nh.nh_dev == arg->dev &&
d4ead6b3
DA
4086 !fib6_metric_locked(rt, RTAX_MTU)) {
4087 u32 mtu = rt->fib6_pmtu;
4088
4089 if (mtu >= arg->mtu ||
4090 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4091 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4092
f5bbe7ee 4093 spin_lock_bh(&rt6_exception_lock);
e9fa1495 4094 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 4095 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4096 }
1da177e4
LT
4097 return 0;
4098}
4099
95c96174 4100void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4101{
c71099ac
TG
4102 struct rt6_mtu_change_arg arg = {
4103 .dev = dev,
4104 .mtu = mtu,
4105 };
1da177e4 4106
0c3584d5 4107 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4108}
4109
ef7c79ed 4110static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4111 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
aa8f8778 4112 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
86872cb5 4113 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4114 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4115 [RTA_PRIORITY] = { .type = NLA_U32 },
4116 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4117 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4118 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4119 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4120 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4121 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4122 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4123 [RTA_MARK] = { .type = NLA_U32 },
aa8f8778 4124 [RTA_TABLE] = { .type = NLA_U32 },
eacb9384
RP
4125 [RTA_IP_PROTO] = { .type = NLA_U8 },
4126 [RTA_SPORT] = { .type = NLA_U16 },
4127 [RTA_DPORT] = { .type = NLA_U16 },
86872cb5
TG
4128};
4129
4130static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4131 struct fib6_config *cfg,
4132 struct netlink_ext_ack *extack)
1da177e4 4133{
86872cb5
TG
4134 struct rtmsg *rtm;
4135 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4136 unsigned int pref;
86872cb5 4137 int err;
1da177e4 4138
fceb6435 4139 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
dac9c979 4140 extack);
86872cb5
TG
4141 if (err < 0)
4142 goto errout;
1da177e4 4143
86872cb5
TG
4144 err = -EINVAL;
4145 rtm = nlmsg_data(nlh);
86872cb5 4146
84db8407
4147 *cfg = (struct fib6_config){
4148 .fc_table = rtm->rtm_table,
4149 .fc_dst_len = rtm->rtm_dst_len,
4150 .fc_src_len = rtm->rtm_src_len,
4151 .fc_flags = RTF_UP,
4152 .fc_protocol = rtm->rtm_protocol,
4153 .fc_type = rtm->rtm_type,
4154
4155 .fc_nlinfo.portid = NETLINK_CB(skb).portid,
4156 .fc_nlinfo.nlh = nlh,
4157 .fc_nlinfo.nl_net = sock_net(skb->sk),
4158 };
86872cb5 4159
ef2c7d7b
ND
4160 if (rtm->rtm_type == RTN_UNREACHABLE ||
4161 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4162 rtm->rtm_type == RTN_PROHIBIT ||
4163 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4164 cfg->fc_flags |= RTF_REJECT;
4165
ab79ad14
4166 if (rtm->rtm_type == RTN_LOCAL)
4167 cfg->fc_flags |= RTF_LOCAL;
4168
1f56a01f
MKL
4169 if (rtm->rtm_flags & RTM_F_CLONED)
4170 cfg->fc_flags |= RTF_CACHE;
4171
fc1e64e1
DA
4172 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4173
86872cb5 4174 if (tb[RTA_GATEWAY]) {
67b61f6c 4175 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4176 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4177 }
30b9819b
DA
4178 if (tb[RTA_VIA]) {
4179 NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4180 goto errout;
4181 }
86872cb5
TG
4182
4183 if (tb[RTA_DST]) {
4184 int plen = (rtm->rtm_dst_len + 7) >> 3;
4185
4186 if (nla_len(tb[RTA_DST]) < plen)
4187 goto errout;
4188
4189 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4190 }
86872cb5
TG
4191
4192 if (tb[RTA_SRC]) {
4193 int plen = (rtm->rtm_src_len + 7) >> 3;
4194
4195 if (nla_len(tb[RTA_SRC]) < plen)
4196 goto errout;
4197
4198 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4199 }
86872cb5 4200
c3968a85 4201 if (tb[RTA_PREFSRC])
67b61f6c 4202 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4203
86872cb5
TG
4204 if (tb[RTA_OIF])
4205 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4206
4207 if (tb[RTA_PRIORITY])
4208 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4209
4210 if (tb[RTA_METRICS]) {
4211 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4212 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4213 }
86872cb5
TG
4214
4215 if (tb[RTA_TABLE])
4216 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4217
51ebd318
ND
4218 if (tb[RTA_MULTIPATH]) {
4219 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4220 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4221
4222 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4223 cfg->fc_mp_len, extack);
9ed59592
DA
4224 if (err < 0)
4225 goto errout;
51ebd318
ND
4226 }
4227
c78ba6d6
LR
4228 if (tb[RTA_PREF]) {
4229 pref = nla_get_u8(tb[RTA_PREF]);
4230 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4231 pref != ICMPV6_ROUTER_PREF_HIGH)
4232 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4233 cfg->fc_flags |= RTF_PREF(pref);
4234 }
4235
19e42e45
RP
4236 if (tb[RTA_ENCAP])
4237 cfg->fc_encap = tb[RTA_ENCAP];
4238
9ed59592 4239 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4240 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4241
c255bd68 4242 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4243 if (err < 0)
4244 goto errout;
4245 }
4246
32bc201e
XL
4247 if (tb[RTA_EXPIRES]) {
4248 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4249
4250 if (addrconf_finite_timeout(timeout)) {
4251 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4252 cfg->fc_flags |= RTF_EXPIRES;
4253 }
4254 }
4255
86872cb5
TG
4256 err = 0;
4257errout:
4258 return err;
1da177e4
LT
4259}
4260
6b9ea5a6 4261struct rt6_nh {
8d1c802b 4262 struct fib6_info *fib6_info;
6b9ea5a6 4263 struct fib6_config r_cfg;
6b9ea5a6
RP
4264 struct list_head next;
4265};
4266
4267static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4268{
4269 struct rt6_nh *nh;
4270
4271 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 4272 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
4273 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4274 nh->r_cfg.fc_ifindex);
4275 }
4276}
4277
d4ead6b3
DA
4278static int ip6_route_info_append(struct net *net,
4279 struct list_head *rt6_nh_list,
8d1c802b
DA
4280 struct fib6_info *rt,
4281 struct fib6_config *r_cfg)
6b9ea5a6
RP
4282{
4283 struct rt6_nh *nh;
6b9ea5a6
RP
4284 int err = -EEXIST;
4285
4286 list_for_each_entry(nh, rt6_nh_list, next) {
8d1c802b
DA
4287 /* check if fib6_info already exists */
4288 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
6b9ea5a6
RP
4289 return err;
4290 }
4291
4292 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4293 if (!nh)
4294 return -ENOMEM;
8d1c802b 4295 nh->fib6_info = rt;
6b9ea5a6
RP
4296 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4297 list_add_tail(&nh->next, rt6_nh_list);
4298
4299 return 0;
4300}
4301
8d1c802b
DA
4302static void ip6_route_mpath_notify(struct fib6_info *rt,
4303 struct fib6_info *rt_last,
3b1137fe
DA
4304 struct nl_info *info,
4305 __u16 nlflags)
4306{
4307 /* if this is an APPEND route, then rt points to the first route
4308 * inserted and rt_last points to last route inserted. Userspace
4309 * wants a consistent dump of the route which starts at the first
4310 * nexthop. Since sibling routes are always added at the end of
4311 * the list, find the first sibling of the last route appended
4312 */
93c2fb25
DA
4313 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4314 rt = list_first_entry(&rt_last->fib6_siblings,
8d1c802b 4315 struct fib6_info,
93c2fb25 4316 fib6_siblings);
3b1137fe
DA
4317 }
4318
4319 if (rt)
4320 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4321}
4322
333c4301
DA
4323static int ip6_route_multipath_add(struct fib6_config *cfg,
4324 struct netlink_ext_ack *extack)
51ebd318 4325{
8d1c802b 4326 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
3b1137fe 4327 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4328 struct fib6_config r_cfg;
4329 struct rtnexthop *rtnh;
8d1c802b 4330 struct fib6_info *rt;
6b9ea5a6
RP
4331 struct rt6_nh *err_nh;
4332 struct rt6_nh *nh, *nh_safe;
3b1137fe 4333 __u16 nlflags;
51ebd318
ND
4334 int remaining;
4335 int attrlen;
6b9ea5a6
RP
4336 int err = 1;
4337 int nhn = 0;
4338 int replace = (cfg->fc_nlinfo.nlh &&
4339 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4340 LIST_HEAD(rt6_nh_list);
51ebd318 4341
3b1137fe
DA
4342 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4343 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4344 nlflags |= NLM_F_APPEND;
4345
35f1b4e9 4346 remaining = cfg->fc_mp_len;
51ebd318 4347 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4348
6b9ea5a6 4349 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
8d1c802b 4350 * fib6_info structs per nexthop
6b9ea5a6 4351 */
51ebd318
ND
4352 while (rtnh_ok(rtnh, remaining)) {
4353 memcpy(&r_cfg, cfg, sizeof(*cfg));
4354 if (rtnh->rtnh_ifindex)
4355 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4356
4357 attrlen = rtnh_attrlen(rtnh);
4358 if (attrlen > 0) {
4359 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4360
4361 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4362 if (nla) {
67b61f6c 4363 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4364 r_cfg.fc_flags |= RTF_GATEWAY;
4365 }
19e42e45
RP
4366 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4367 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4368 if (nla)
4369 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4370 }
6b9ea5a6 4371
68e2ffde 4372 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
acb54e3c 4373 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
8c5b83f0
RP
4374 if (IS_ERR(rt)) {
4375 err = PTR_ERR(rt);
4376 rt = NULL;
6b9ea5a6 4377 goto cleanup;
8c5b83f0 4378 }
b5d2d75e
DA
4379 if (!rt6_qualify_for_ecmp(rt)) {
4380 err = -EINVAL;
4381 NL_SET_ERR_MSG(extack,
4382 "Device only routes can not be added for IPv6 using the multipath API.");
4383 fib6_info_release(rt);
4384 goto cleanup;
4385 }
6b9ea5a6 4386
5e670d84 4387 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
398958ae 4388
d4ead6b3
DA
4389 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4390 rt, &r_cfg);
51ebd318 4391 if (err) {
93531c67 4392 fib6_info_release(rt);
6b9ea5a6
RP
4393 goto cleanup;
4394 }
4395
4396 rtnh = rtnh_next(rtnh, &remaining);
4397 }
4398
3b1137fe
DA
4399 /* for add and replace send one notification with all nexthops.
4400 * Skip the notification in fib6_add_rt2node and send one with
4401 * the full route when done
4402 */
4403 info->skip_notify = 1;
4404
6b9ea5a6
RP
4405 err_nh = NULL;
4406 list_for_each_entry(nh, &rt6_nh_list, next) {
8d1c802b
DA
4407 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4408 fib6_info_release(nh->fib6_info);
93531c67 4409
f7225172
DA
4410 if (!err) {
4411 /* save reference to last route successfully inserted */
4412 rt_last = nh->fib6_info;
4413
4414 /* save reference to first route for notification */
4415 if (!rt_notif)
4416 rt_notif = nh->fib6_info;
4417 }
3b1137fe 4418
8d1c802b
DA
4419 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4420 nh->fib6_info = NULL;
6b9ea5a6
RP
4421 if (err) {
4422 if (replace && nhn)
4423 ip6_print_replace_route_err(&rt6_nh_list);
4424 err_nh = nh;
4425 goto add_errout;
51ebd318 4426 }
6b9ea5a6 4427
1a72418b 4428 /* Because each route is added like a single route we remove
27596472
MK
4429 * these flags after the first nexthop: if there is a collision,
4430 * we have already failed to add the first nexthop:
4431 * fib6_add_rt2node() has rejected it; when replacing, old
4432 * nexthops have been replaced by first new, the rest should
4433 * be added to it.
1a72418b 4434 */
27596472
MK
4435 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4436 NLM_F_REPLACE);
6b9ea5a6
RP
4437 nhn++;
4438 }
4439
3b1137fe
DA
4440 /* success ... tell user about new route */
4441 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4442 goto cleanup;
4443
4444add_errout:
3b1137fe
DA
4445 /* send notification for routes that were added so that
4446 * the delete notifications sent by ip6_route_del are
4447 * coherent
4448 */
4449 if (rt_notif)
4450 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4451
6b9ea5a6
RP
4452 /* Delete routes that were already added */
4453 list_for_each_entry(nh, &rt6_nh_list, next) {
4454 if (err_nh == nh)
4455 break;
333c4301 4456 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4457 }
4458
4459cleanup:
4460 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
8d1c802b
DA
4461 if (nh->fib6_info)
4462 fib6_info_release(nh->fib6_info);
6b9ea5a6
RP
4463 list_del(&nh->next);
4464 kfree(nh);
4465 }
4466
4467 return err;
4468}
4469
333c4301
DA
4470static int ip6_route_multipath_del(struct fib6_config *cfg,
4471 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4472{
4473 struct fib6_config r_cfg;
4474 struct rtnexthop *rtnh;
4475 int remaining;
4476 int attrlen;
4477 int err = 1, last_err = 0;
4478
4479 remaining = cfg->fc_mp_len;
4480 rtnh = (struct rtnexthop *)cfg->fc_mp;
4481
4482 /* Parse a Multipath Entry */
4483 while (rtnh_ok(rtnh, remaining)) {
4484 memcpy(&r_cfg, cfg, sizeof(*cfg));
4485 if (rtnh->rtnh_ifindex)
4486 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4487
4488 attrlen = rtnh_attrlen(rtnh);
4489 if (attrlen > 0) {
4490 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4491
4492 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4493 if (nla) {
4494 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4495 r_cfg.fc_flags |= RTF_GATEWAY;
4496 }
4497 }
333c4301 4498 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4499 if (err)
4500 last_err = err;
4501
51ebd318
ND
4502 rtnh = rtnh_next(rtnh, &remaining);
4503 }
4504
4505 return last_err;
4506}
4507
c21ef3e3
DA
4508static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4509 struct netlink_ext_ack *extack)
1da177e4 4510{
86872cb5
TG
4511 struct fib6_config cfg;
4512 int err;
1da177e4 4513
333c4301 4514 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4515 if (err < 0)
4516 return err;
4517
51ebd318 4518 if (cfg.fc_mp)
333c4301 4519 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4520 else {
4521 cfg.fc_delete_all_nh = 1;
333c4301 4522 return ip6_route_del(&cfg, extack);
0ae81335 4523 }
1da177e4
LT
4524}
4525
c21ef3e3
DA
4526static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4527 struct netlink_ext_ack *extack)
1da177e4 4528{
86872cb5
TG
4529 struct fib6_config cfg;
4530 int err;
1da177e4 4531
333c4301 4532 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4533 if (err < 0)
4534 return err;
4535
51ebd318 4536 if (cfg.fc_mp)
333c4301 4537 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4538 else
acb54e3c 4539 return ip6_route_add(&cfg, GFP_KERNEL, extack);
1da177e4
LT
4540}
4541
8d1c802b 4542static size_t rt6_nlmsg_size(struct fib6_info *rt)
339bf98f 4543{
beb1afac
DA
4544 int nexthop_len = 0;
4545
93c2fb25 4546 if (rt->fib6_nsiblings) {
beb1afac
DA
4547 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4548 + NLA_ALIGN(sizeof(struct rtnexthop))
4549 + nla_total_size(16) /* RTA_GATEWAY */
5e670d84 4550 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
beb1afac 4551
93c2fb25 4552 nexthop_len *= rt->fib6_nsiblings;
beb1afac
DA
4553 }
4554
339bf98f
TG
4555 return NLMSG_ALIGN(sizeof(struct rtmsg))
4556 + nla_total_size(16) /* RTA_SRC */
4557 + nla_total_size(16) /* RTA_DST */
4558 + nla_total_size(16) /* RTA_GATEWAY */
4559 + nla_total_size(16) /* RTA_PREFSRC */
4560 + nla_total_size(4) /* RTA_TABLE */
4561 + nla_total_size(4) /* RTA_IIF */
4562 + nla_total_size(4) /* RTA_OIF */
4563 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4564 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4565 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4566 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4567 + nla_total_size(1) /* RTA_PREF */
5e670d84 4568 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
beb1afac
DA
4569 + nexthop_len;
4570}
4571
8d1c802b 4572static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
5be083ce 4573 unsigned int *flags, bool skip_oif)
beb1afac 4574{
5e670d84 4575 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
f9d882ea
IS
4576 *flags |= RTNH_F_DEAD;
4577
5e670d84 4578 if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
beb1afac 4579 *flags |= RTNH_F_LINKDOWN;
dcd1f572
DA
4580
4581 rcu_read_lock();
4582 if (fib6_ignore_linkdown(rt))
beb1afac 4583 *flags |= RTNH_F_DEAD;
dcd1f572 4584 rcu_read_unlock();
beb1afac
DA
4585 }
4586
93c2fb25 4587 if (rt->fib6_flags & RTF_GATEWAY) {
5e670d84 4588 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
beb1afac
DA
4589 goto nla_put_failure;
4590 }
4591
5e670d84
DA
4592 *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4593 if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
4594 *flags |= RTNH_F_OFFLOAD;
4595
5be083ce 4596 /* not needed for multipath encoding b/c it has a rtnexthop struct */
5e670d84
DA
4597 if (!skip_oif && rt->fib6_nh.nh_dev &&
4598 nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
beb1afac
DA
4599 goto nla_put_failure;
4600
5e670d84
DA
4601 if (rt->fib6_nh.nh_lwtstate &&
4602 lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
beb1afac
DA
4603 goto nla_put_failure;
4604
4605 return 0;
4606
4607nla_put_failure:
4608 return -EMSGSIZE;
4609}
4610
5be083ce 4611/* add multipath next hop */
8d1c802b 4612static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
beb1afac 4613{
5e670d84 4614 const struct net_device *dev = rt->fib6_nh.nh_dev;
beb1afac
DA
4615 struct rtnexthop *rtnh;
4616 unsigned int flags = 0;
4617
4618 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4619 if (!rtnh)
4620 goto nla_put_failure;
4621
5e670d84
DA
4622 rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4623 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
beb1afac 4624
5be083ce 4625 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4626 goto nla_put_failure;
4627
4628 rtnh->rtnh_flags = flags;
4629
4630 /* length of rtnetlink header + attributes */
4631 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4632
4633 return 0;
4634
4635nla_put_failure:
4636 return -EMSGSIZE;
339bf98f
TG
4637}
4638
d4ead6b3 4639static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 4640 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 4641 struct in6_addr *dest, struct in6_addr *src,
15e47304 4642 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4643 unsigned int flags)
1da177e4 4644{
22d0bd82
XL
4645 struct rt6_info *rt6 = (struct rt6_info *)dst;
4646 struct rt6key *rt6_dst, *rt6_src;
4647 u32 *pmetrics, table, rt6_flags;
2d7202bf 4648 struct nlmsghdr *nlh;
22d0bd82 4649 struct rtmsg *rtm;
d4ead6b3 4650 long expires = 0;
1da177e4 4651
15e47304 4652 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4653 if (!nlh)
26932566 4654 return -EMSGSIZE;
2d7202bf 4655
22d0bd82
XL
4656 if (rt6) {
4657 rt6_dst = &rt6->rt6i_dst;
4658 rt6_src = &rt6->rt6i_src;
4659 rt6_flags = rt6->rt6i_flags;
4660 } else {
4661 rt6_dst = &rt->fib6_dst;
4662 rt6_src = &rt->fib6_src;
4663 rt6_flags = rt->fib6_flags;
4664 }
4665
2d7202bf 4666 rtm = nlmsg_data(nlh);
1da177e4 4667 rtm->rtm_family = AF_INET6;
22d0bd82
XL
4668 rtm->rtm_dst_len = rt6_dst->plen;
4669 rtm->rtm_src_len = rt6_src->plen;
1da177e4 4670 rtm->rtm_tos = 0;
93c2fb25
DA
4671 if (rt->fib6_table)
4672 table = rt->fib6_table->tb6_id;
c71099ac 4673 else
9e762a4a 4674 table = RT6_TABLE_UNSPEC;
53ad6977 4675 rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
c78679e8
DM
4676 if (nla_put_u32(skb, RTA_TABLE, table))
4677 goto nla_put_failure;
e8478e80
DA
4678
4679 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4680 rtm->rtm_flags = 0;
4681 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
93c2fb25 4682 rtm->rtm_protocol = rt->fib6_protocol;
1da177e4 4683
22d0bd82 4684 if (rt6_flags & RTF_CACHE)
1da177e4
LT
4685 rtm->rtm_flags |= RTM_F_CLONED;
4686
d4ead6b3
DA
4687 if (dest) {
4688 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4689 goto nla_put_failure;
1ab1457c 4690 rtm->rtm_dst_len = 128;
1da177e4 4691 } else if (rtm->rtm_dst_len)
22d0bd82 4692 if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
c78679e8 4693 goto nla_put_failure;
1da177e4
LT
4694#ifdef CONFIG_IPV6_SUBTREES
4695 if (src) {
930345ea 4696 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4697 goto nla_put_failure;
1ab1457c 4698 rtm->rtm_src_len = 128;
c78679e8 4699 } else if (rtm->rtm_src_len &&
22d0bd82 4700 nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
c78679e8 4701 goto nla_put_failure;
1da177e4 4702#endif
7bc570c8
YH
4703 if (iif) {
4704#ifdef CONFIG_IPV6_MROUTE
22d0bd82 4705 if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
fd61c6ba
DA
4706 int err = ip6mr_get_route(net, skb, rtm, portid);
4707
4708 if (err == 0)
4709 return 0;
4710 if (err < 0)
4711 goto nla_put_failure;
7bc570c8
YH
4712 } else
4713#endif
c78679e8
DM
4714 if (nla_put_u32(skb, RTA_IIF, iif))
4715 goto nla_put_failure;
d4ead6b3 4716 } else if (dest) {
1da177e4 4717 struct in6_addr saddr_buf;
d4ead6b3 4718 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4719 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4720 goto nla_put_failure;
1da177e4 4721 }
2d7202bf 4722
93c2fb25 4723 if (rt->fib6_prefsrc.plen) {
c3968a85 4724 struct in6_addr saddr_buf;
93c2fb25 4725 saddr_buf = rt->fib6_prefsrc.addr;
930345ea 4726 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4727 goto nla_put_failure;
c3968a85
DW
4728 }
4729
d4ead6b3
DA
4730 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4731 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4732 goto nla_put_failure;
4733
93c2fb25 4734 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
c78679e8 4735 goto nla_put_failure;
8253947e 4736
beb1afac
DA
4737 /* For multipath routes, walk the siblings list and add
4738 * each as a nexthop within RTA_MULTIPATH.
4739 */
22d0bd82
XL
4740 if (rt6) {
4741 if (rt6_flags & RTF_GATEWAY &&
4742 nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
4743 goto nla_put_failure;
4744
4745 if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
4746 goto nla_put_failure;
4747 } else if (rt->fib6_nsiblings) {
8d1c802b 4748 struct fib6_info *sibling, *next_sibling;
beb1afac
DA
4749 struct nlattr *mp;
4750
4751 mp = nla_nest_start(skb, RTA_MULTIPATH);
4752 if (!mp)
4753 goto nla_put_failure;
4754
4755 if (rt6_add_nexthop(skb, rt) < 0)
4756 goto nla_put_failure;
4757
4758 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25 4759 &rt->fib6_siblings, fib6_siblings) {
beb1afac
DA
4760 if (rt6_add_nexthop(skb, sibling) < 0)
4761 goto nla_put_failure;
4762 }
4763
4764 nla_nest_end(skb, mp);
4765 } else {
5be083ce 4766 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4767 goto nla_put_failure;
4768 }
4769
22d0bd82 4770 if (rt6_flags & RTF_EXPIRES) {
14895687
DA
4771 expires = dst ? dst->expires : rt->expires;
4772 expires -= jiffies;
4773 }
69cdf8f9 4774
d4ead6b3 4775 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 4776 goto nla_put_failure;
2d7202bf 4777
22d0bd82 4778 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
c78ba6d6
LR
4779 goto nla_put_failure;
4780
19e42e45 4781
053c095a
JB
4782 nlmsg_end(skb, nlh);
4783 return 0;
2d7202bf
TG
4784
4785nla_put_failure:
26932566
PM
4786 nlmsg_cancel(skb, nlh);
4787 return -EMSGSIZE;
1da177e4
LT
4788}
4789
13e38901
DA
4790static bool fib6_info_uses_dev(const struct fib6_info *f6i,
4791 const struct net_device *dev)
4792{
4793 if (f6i->fib6_nh.nh_dev == dev)
4794 return true;
4795
4796 if (f6i->fib6_nsiblings) {
4797 struct fib6_info *sibling, *next_sibling;
4798
4799 list_for_each_entry_safe(sibling, next_sibling,
4800 &f6i->fib6_siblings, fib6_siblings) {
4801 if (sibling->fib6_nh.nh_dev == dev)
4802 return true;
4803 }
4804 }
4805
4806 return false;
4807}
4808
8d1c802b 4809int rt6_dump_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4810{
4811 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
13e38901
DA
4812 struct fib_dump_filter *filter = &arg->filter;
4813 unsigned int flags = NLM_F_MULTI;
1f17e2f2
DA
4814 struct net *net = arg->net;
4815
421842ed 4816 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 4817 return 0;
1da177e4 4818
13e38901
DA
4819 if ((filter->flags & RTM_F_PREFIX) &&
4820 !(rt->fib6_flags & RTF_PREFIX_RT)) {
4821 /* success since this is not a prefix route */
4822 return 1;
4823 }
4824 if (filter->filter_set) {
4825 if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
4826 (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
4827 (filter->protocol && rt->fib6_protocol != filter->protocol)) {
f8cfe2ce
DA
4828 return 1;
4829 }
13e38901 4830 flags |= NLM_F_DUMP_FILTERED;
f8cfe2ce 4831 }
1da177e4 4832
d4ead6b3
DA
4833 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4834 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
13e38901 4835 arg->cb->nlh->nlmsg_seq, flags);
1da177e4
LT
4836}
4837
c21ef3e3
DA
4838static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4839 struct netlink_ext_ack *extack)
1da177e4 4840{
3b1e0a65 4841 struct net *net = sock_net(in_skb->sk);
ab364a6f 4842 struct nlattr *tb[RTA_MAX+1];
18c3a61c 4843 int err, iif = 0, oif = 0;
a68886a6 4844 struct fib6_info *from;
18c3a61c 4845 struct dst_entry *dst;
ab364a6f 4846 struct rt6_info *rt;
1da177e4 4847 struct sk_buff *skb;
ab364a6f 4848 struct rtmsg *rtm;
744486d4 4849 struct flowi6 fl6 = {};
18c3a61c 4850 bool fibmatch;
1da177e4 4851
fceb6435 4852 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4853 extack);
ab364a6f
TG
4854 if (err < 0)
4855 goto errout;
1da177e4 4856
ab364a6f 4857 err = -EINVAL;
38b7097b
HFS
4858 rtm = nlmsg_data(nlh);
4859 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4860 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4861
ab364a6f
TG
4862 if (tb[RTA_SRC]) {
4863 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4864 goto errout;
4865
4e3fd7a0 4866 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4867 }
4868
4869 if (tb[RTA_DST]) {
4870 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4871 goto errout;
4872
4e3fd7a0 4873 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4874 }
4875
4876 if (tb[RTA_IIF])
4877 iif = nla_get_u32(tb[RTA_IIF]);
4878
4879 if (tb[RTA_OIF])
72331bc0 4880 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4881
2e47b291
LC
4882 if (tb[RTA_MARK])
4883 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4884
622ec2c9
LC
4885 if (tb[RTA_UID])
4886 fl6.flowi6_uid = make_kuid(current_user_ns(),
4887 nla_get_u32(tb[RTA_UID]));
4888 else
4889 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4890
eacb9384
RP
4891 if (tb[RTA_SPORT])
4892 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4893
4894 if (tb[RTA_DPORT])
4895 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4896
4897 if (tb[RTA_IP_PROTO]) {
4898 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
b17fa794
HL
4899 &fl6.flowi6_proto, AF_INET6,
4900 extack);
eacb9384
RP
4901 if (err)
4902 goto errout;
4903 }
4904
1da177e4
LT
4905 if (iif) {
4906 struct net_device *dev;
72331bc0
SL
4907 int flags = 0;
4908
121622db
FW
4909 rcu_read_lock();
4910
4911 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4912 if (!dev) {
121622db 4913 rcu_read_unlock();
1da177e4 4914 err = -ENODEV;
ab364a6f 4915 goto errout;
1da177e4 4916 }
72331bc0
SL
4917
4918 fl6.flowi6_iif = iif;
4919
4920 if (!ipv6_addr_any(&fl6.saddr))
4921 flags |= RT6_LOOKUP_F_HAS_SADDR;
4922
b75cc8f9 4923 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
4924
4925 rcu_read_unlock();
72331bc0
SL
4926 } else {
4927 fl6.flowi6_oif = oif;
4928
58acfd71 4929 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4930 }
4931
18c3a61c
RP
4932
4933 rt = container_of(dst, struct rt6_info, dst);
4934 if (rt->dst.error) {
4935 err = rt->dst.error;
4936 ip6_rt_put(rt);
4937 goto errout;
1da177e4
LT
4938 }
4939
9d6acb3b
WC
4940 if (rt == net->ipv6.ip6_null_entry) {
4941 err = rt->dst.error;
4942 ip6_rt_put(rt);
4943 goto errout;
4944 }
4945
ab364a6f 4946 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4947 if (!skb) {
94e187c0 4948 ip6_rt_put(rt);
ab364a6f
TG
4949 err = -ENOBUFS;
4950 goto errout;
4951 }
1da177e4 4952
d8d1f30b 4953 skb_dst_set(skb, &rt->dst);
a68886a6
DA
4954
4955 rcu_read_lock();
4956 from = rcu_dereference(rt->from);
4957
18c3a61c 4958 if (fibmatch)
a68886a6 4959 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
18c3a61c
RP
4960 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4961 nlh->nlmsg_seq, 0);
4962 else
a68886a6
DA
4963 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
4964 &fl6.saddr, iif, RTM_NEWROUTE,
d4ead6b3
DA
4965 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4966 0);
a68886a6
DA
4967 rcu_read_unlock();
4968
1da177e4 4969 if (err < 0) {
ab364a6f
TG
4970 kfree_skb(skb);
4971 goto errout;
1da177e4
LT
4972 }
4973
15e47304 4974 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4975errout:
1da177e4 4976 return err;
1da177e4
LT
4977}
4978
8d1c802b 4979void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
37a1d361 4980 unsigned int nlm_flags)
1da177e4
LT
4981{
4982 struct sk_buff *skb;
5578689a 4983 struct net *net = info->nl_net;
528c4ceb
DL
4984 u32 seq;
4985 int err;
4986
4987 err = -ENOBUFS;
38308473 4988 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4989
19e42e45 4990 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4991 if (!skb)
21713ebc
TG
4992 goto errout;
4993
d4ead6b3
DA
4994 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4995 event, info->portid, seq, nlm_flags);
26932566
PM
4996 if (err < 0) {
4997 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4998 WARN_ON(err == -EMSGSIZE);
4999 kfree_skb(skb);
5000 goto errout;
5001 }
15e47304 5002 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
5003 info->nlh, gfp_any());
5004 return;
21713ebc
TG
5005errout:
5006 if (err < 0)
5578689a 5007 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
5008}
5009
8ed67789 5010static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 5011 unsigned long event, void *ptr)
8ed67789 5012{
351638e7 5013 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 5014 struct net *net = dev_net(dev);
8ed67789 5015
242d3a49
WC
5016 if (!(dev->flags & IFF_LOOPBACK))
5017 return NOTIFY_OK;
5018
5019 if (event == NETDEV_REGISTER) {
421842ed 5020 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
d8d1f30b 5021 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
5022 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5023#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 5024 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 5025 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 5026 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 5027 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 5028#endif
76da0704
WC
5029 } else if (event == NETDEV_UNREGISTER &&
5030 dev->reg_state != NETREG_UNREGISTERED) {
5031 /* NETDEV_UNREGISTER could be fired for multiple times by
5032 * netdev_wait_allrefs(). Make sure we only call this once.
5033 */
12d94a80 5034 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 5035#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
5036 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5037 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
5038#endif
5039 }
5040
5041 return NOTIFY_OK;
5042}
5043
1da177e4
LT
5044/*
5045 * /proc
5046 */
5047
5048#ifdef CONFIG_PROC_FS
1da177e4
LT
5049static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5050{
69ddb805 5051 struct net *net = (struct net *)seq->private;
1da177e4 5052 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
5053 net->ipv6.rt6_stats->fib_nodes,
5054 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 5055 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
5056 net->ipv6.rt6_stats->fib_rt_entries,
5057 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 5058 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 5059 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
5060
5061 return 0;
5062}
1da177e4
LT
5063#endif /* CONFIG_PROC_FS */
5064
5065#ifdef CONFIG_SYSCTL
5066
1da177e4 5067static
fe2c6338 5068int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
5069 void __user *buffer, size_t *lenp, loff_t *ppos)
5070{
c486da34
LAG
5071 struct net *net;
5072 int delay;
5073 if (!write)
1da177e4 5074 return -EINVAL;
c486da34
LAG
5075
5076 net = (struct net *)ctl->extra1;
5077 delay = net->ipv6.sysctl.flush_delay;
5078 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 5079 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 5080 return 0;
1da177e4
LT
5081}
5082
7c6bb7d2
DA
5083static int zero;
5084static int one = 1;
5085
ed792e28 5086static struct ctl_table ipv6_route_table_template[] = {
1ab1457c 5087 {
1da177e4 5088 .procname = "flush",
4990509f 5089 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 5090 .maxlen = sizeof(int),
89c8b3a1 5091 .mode = 0200,
6d9f239a 5092 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
5093 },
5094 {
1da177e4 5095 .procname = "gc_thresh",
9a7ec3a9 5096 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
5097 .maxlen = sizeof(int),
5098 .mode = 0644,
6d9f239a 5099 .proc_handler = proc_dointvec,
1da177e4
LT
5100 },
5101 {
1da177e4 5102 .procname = "max_size",
4990509f 5103 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
5104 .maxlen = sizeof(int),
5105 .mode = 0644,
6d9f239a 5106 .proc_handler = proc_dointvec,
1da177e4
LT
5107 },
5108 {
1da177e4 5109 .procname = "gc_min_interval",
4990509f 5110 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5111 .maxlen = sizeof(int),
5112 .mode = 0644,
6d9f239a 5113 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5114 },
5115 {
1da177e4 5116 .procname = "gc_timeout",
4990509f 5117 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
5118 .maxlen = sizeof(int),
5119 .mode = 0644,
6d9f239a 5120 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5121 },
5122 {
1da177e4 5123 .procname = "gc_interval",
4990509f 5124 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
5125 .maxlen = sizeof(int),
5126 .mode = 0644,
6d9f239a 5127 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5128 },
5129 {
1da177e4 5130 .procname = "gc_elasticity",
4990509f 5131 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
5132 .maxlen = sizeof(int),
5133 .mode = 0644,
f3d3f616 5134 .proc_handler = proc_dointvec,
1da177e4
LT
5135 },
5136 {
1da177e4 5137 .procname = "mtu_expires",
4990509f 5138 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
5139 .maxlen = sizeof(int),
5140 .mode = 0644,
6d9f239a 5141 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5142 },
5143 {
1da177e4 5144 .procname = "min_adv_mss",
4990509f 5145 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
5146 .maxlen = sizeof(int),
5147 .mode = 0644,
f3d3f616 5148 .proc_handler = proc_dointvec,
1da177e4
LT
5149 },
5150 {
1da177e4 5151 .procname = "gc_min_interval_ms",
4990509f 5152 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5153 .maxlen = sizeof(int),
5154 .mode = 0644,
6d9f239a 5155 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 5156 },
7c6bb7d2
DA
5157 {
5158 .procname = "skip_notify_on_dev_down",
5159 .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
5160 .maxlen = sizeof(int),
5161 .mode = 0644,
5162 .proc_handler = proc_dointvec,
5163 .extra1 = &zero,
5164 .extra2 = &one,
5165 },
f8572d8f 5166 { }
1da177e4
LT
5167};
5168
2c8c1e72 5169struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5170{
5171 struct ctl_table *table;
5172
5173 table = kmemdup(ipv6_route_table_template,
5174 sizeof(ipv6_route_table_template),
5175 GFP_KERNEL);
5ee09105
YH
5176
5177 if (table) {
5178 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5179 table[0].extra1 = net;
86393e52 5180 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5181 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5182 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5183 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5184 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5185 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5186 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5187 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5188 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
7c6bb7d2 5189 table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
464dc801
EB
5190
5191 /* Don't export sysctls to unprivileged users */
5192 if (net->user_ns != &init_user_ns)
5193 table[0].procname = NULL;
5ee09105
YH
5194 }
5195
760f2d01
DL
5196 return table;
5197}
1da177e4
LT
5198#endif
5199
2c8c1e72 5200static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5201{
633d424b 5202 int ret = -ENOMEM;
8ed67789 5203
86393e52
AD
5204 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5205 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5206
fc66f95c
ED
5207 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5208 goto out_ip6_dst_ops;
5209
421842ed
DA
5210 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5211 sizeof(*net->ipv6.fib6_null_entry),
5212 GFP_KERNEL);
5213 if (!net->ipv6.fib6_null_entry)
5214 goto out_ip6_dst_entries;
5215
8ed67789
DL
5216 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5217 sizeof(*net->ipv6.ip6_null_entry),
5218 GFP_KERNEL);
5219 if (!net->ipv6.ip6_null_entry)
421842ed 5220 goto out_fib6_null_entry;
d8d1f30b 5221 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5222 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5223 ip6_template_metrics, true);
8ed67789
DL
5224
5225#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5226 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5227 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5228 sizeof(*net->ipv6.ip6_prohibit_entry),
5229 GFP_KERNEL);
68fffc67
PZ
5230 if (!net->ipv6.ip6_prohibit_entry)
5231 goto out_ip6_null_entry;
d8d1f30b 5232 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5233 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5234 ip6_template_metrics, true);
8ed67789
DL
5235
5236 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5237 sizeof(*net->ipv6.ip6_blk_hole_entry),
5238 GFP_KERNEL);
68fffc67
PZ
5239 if (!net->ipv6.ip6_blk_hole_entry)
5240 goto out_ip6_prohibit_entry;
d8d1f30b 5241 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5242 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5243 ip6_template_metrics, true);
8ed67789
DL
5244#endif
5245
b339a47c
PZ
5246 net->ipv6.sysctl.flush_delay = 0;
5247 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5248 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5249 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5250 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5251 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5252 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5253 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
7c6bb7d2 5254 net->ipv6.sysctl.skip_notify_on_dev_down = 0;
b339a47c 5255
6891a346
BT
5256 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5257
8ed67789
DL
5258 ret = 0;
5259out:
5260 return ret;
f2fc6a54 5261
68fffc67
PZ
5262#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5263out_ip6_prohibit_entry:
5264 kfree(net->ipv6.ip6_prohibit_entry);
5265out_ip6_null_entry:
5266 kfree(net->ipv6.ip6_null_entry);
5267#endif
421842ed
DA
5268out_fib6_null_entry:
5269 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5270out_ip6_dst_entries:
5271 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5272out_ip6_dst_ops:
f2fc6a54 5273 goto out;
cdb18761
DL
5274}
5275
2c8c1e72 5276static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5277{
421842ed 5278 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5279 kfree(net->ipv6.ip6_null_entry);
5280#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5281 kfree(net->ipv6.ip6_prohibit_entry);
5282 kfree(net->ipv6.ip6_blk_hole_entry);
5283#endif
41bb78b4 5284 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5285}
5286
d189634e
TG
5287static int __net_init ip6_route_net_init_late(struct net *net)
5288{
5289#ifdef CONFIG_PROC_FS
c3506372
CH
5290 proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5291 sizeof(struct ipv6_route_iter));
3617d949
CH
5292 proc_create_net_single("rt6_stats", 0444, net->proc_net,
5293 rt6_stats_seq_show, NULL);
d189634e
TG
5294#endif
5295 return 0;
5296}
5297
5298static void __net_exit ip6_route_net_exit_late(struct net *net)
5299{
5300#ifdef CONFIG_PROC_FS
ece31ffd
G
5301 remove_proc_entry("ipv6_route", net->proc_net);
5302 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5303#endif
5304}
5305
cdb18761
DL
5306static struct pernet_operations ip6_route_net_ops = {
5307 .init = ip6_route_net_init,
5308 .exit = ip6_route_net_exit,
5309};
5310
c3426b47
DM
5311static int __net_init ipv6_inetpeer_init(struct net *net)
5312{
5313 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5314
5315 if (!bp)
5316 return -ENOMEM;
5317 inet_peer_base_init(bp);
5318 net->ipv6.peers = bp;
5319 return 0;
5320}
5321
5322static void __net_exit ipv6_inetpeer_exit(struct net *net)
5323{
5324 struct inet_peer_base *bp = net->ipv6.peers;
5325
5326 net->ipv6.peers = NULL;
56a6b248 5327 inetpeer_invalidate_tree(bp);
c3426b47
DM
5328 kfree(bp);
5329}
5330
2b823f72 5331static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5332 .init = ipv6_inetpeer_init,
5333 .exit = ipv6_inetpeer_exit,
5334};
5335
d189634e
TG
5336static struct pernet_operations ip6_route_net_late_ops = {
5337 .init = ip6_route_net_init_late,
5338 .exit = ip6_route_net_exit_late,
5339};
5340
8ed67789
DL
5341static struct notifier_block ip6_route_dev_notifier = {
5342 .notifier_call = ip6_route_dev_notify,
242d3a49 5343 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5344};
5345
2f460933
WC
5346void __init ip6_route_init_special_entries(void)
5347{
5348 /* Registering of the loopback is done before this portion of code,
5349 * the loopback reference in rt6_info will not be taken, do it
5350 * manually for init_net */
421842ed 5351 init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
2f460933
WC
5352 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5353 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5354 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5355 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5356 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5357 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5358 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5359 #endif
5360}
5361
433d49c3 5362int __init ip6_route_init(void)
1da177e4 5363{
433d49c3 5364 int ret;
8d0b94af 5365 int cpu;
433d49c3 5366
9a7ec3a9
DL
5367 ret = -ENOMEM;
5368 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5369 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5370 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5371 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5372 goto out;
14e50e57 5373
fc66f95c 5374 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5375 if (ret)
bdb3289f 5376 goto out_kmem_cache;
bdb3289f 5377
c3426b47
DM
5378 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5379 if (ret)
e8803b6c 5380 goto out_dst_entries;
2a0c451a 5381
7e52b33b
DM
5382 ret = register_pernet_subsys(&ip6_route_net_ops);
5383 if (ret)
5384 goto out_register_inetpeer;
c3426b47 5385
5dc121e9
AE
5386 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5387
e8803b6c 5388 ret = fib6_init();
433d49c3 5389 if (ret)
8ed67789 5390 goto out_register_subsys;
433d49c3 5391
433d49c3
DL
5392 ret = xfrm6_init();
5393 if (ret)
e8803b6c 5394 goto out_fib6_init;
c35b7e72 5395
433d49c3
DL
5396 ret = fib6_rules_init();
5397 if (ret)
5398 goto xfrm6_init;
7e5449c2 5399
d189634e
TG
5400 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5401 if (ret)
5402 goto fib6_rules_init;
5403
16feebcf
FW
5404 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5405 inet6_rtm_newroute, NULL, 0);
5406 if (ret < 0)
5407 goto out_register_late_subsys;
5408
5409 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5410 inet6_rtm_delroute, NULL, 0);
5411 if (ret < 0)
5412 goto out_register_late_subsys;
5413
5414 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5415 inet6_rtm_getroute, NULL,
5416 RTNL_FLAG_DOIT_UNLOCKED);
5417 if (ret < 0)
d189634e 5418 goto out_register_late_subsys;
c127ea2c 5419
8ed67789 5420 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5421 if (ret)
d189634e 5422 goto out_register_late_subsys;
8ed67789 5423
8d0b94af
MKL
5424 for_each_possible_cpu(cpu) {
5425 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5426
5427 INIT_LIST_HEAD(&ul->head);
5428 spin_lock_init(&ul->lock);
5429 }
5430
433d49c3
DL
5431out:
5432 return ret;
5433
d189634e 5434out_register_late_subsys:
16feebcf 5435 rtnl_unregister_all(PF_INET6);
d189634e 5436 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5437fib6_rules_init:
433d49c3
DL
5438 fib6_rules_cleanup();
5439xfrm6_init:
433d49c3 5440 xfrm6_fini();
2a0c451a
TG
5441out_fib6_init:
5442 fib6_gc_cleanup();
8ed67789
DL
5443out_register_subsys:
5444 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5445out_register_inetpeer:
5446 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5447out_dst_entries:
5448 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5449out_kmem_cache:
f2fc6a54 5450 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5451 goto out;
1da177e4
LT
5452}
5453
5454void ip6_route_cleanup(void)
5455{
8ed67789 5456 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5457 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5458 fib6_rules_cleanup();
1da177e4 5459 xfrm6_fini();
1da177e4 5460 fib6_gc_cleanup();
c3426b47 5461 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5462 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5463 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5464 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5465}