]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - net/ipv6/route.c
ipv6: route: enforce RCU protection in ip6_route_check_nh_onlink()
[thirdparty/kernel/stable.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
eacb9384 66#include <net/ip.h>
7c0f6ba6 67#include <linux/uaccess.h>
1da177e4
LT
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
30d444d3
DA
73static int ip6_rt_type_to_error(u8 fib6_type);
74
75#define CREATE_TRACE_POINTS
76#include <trace/events/fib6.h>
77EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
78#undef CREATE_TRACE_POINTS
79
afc154e9 80enum rt6_nud_state {
7e980569
JB
81 RT6_NUD_FAIL_HARD = -3,
82 RT6_NUD_FAIL_PROBE = -2,
83 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
84 RT6_NUD_SUCCEED = 1
85};
86
1da177e4 87static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 88static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 89static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
569d3645 94static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
95
96static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 97static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 98static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 99static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 100static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
101static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
102 struct sk_buff *skb, u32 mtu);
103static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
104 struct sk_buff *skb);
8d1c802b
DA
105static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
106static size_t rt6_nlmsg_size(struct fib6_info *rt);
d4ead6b3 107static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 108 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 109 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
110 int iif, int type, u32 portid, u32 seq,
111 unsigned int flags);
8d1c802b 112static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
113 struct in6_addr *daddr,
114 struct in6_addr *saddr);
1da177e4 115
70ceb4f5 116#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 117static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 118 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
119 const struct in6_addr *gwaddr,
120 struct net_device *dev,
95c96174 121 unsigned int pref);
8d1c802b 122static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 123 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
124 const struct in6_addr *gwaddr,
125 struct net_device *dev);
70ceb4f5
YH
126#endif
127
8d0b94af
MKL
128struct uncached_list {
129 spinlock_t lock;
130 struct list_head head;
131};
132
133static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
134
510c321b 135void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
136{
137 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
138
8d0b94af
MKL
139 rt->rt6i_uncached_list = ul;
140
141 spin_lock_bh(&ul->lock);
142 list_add_tail(&rt->rt6i_uncached, &ul->head);
143 spin_unlock_bh(&ul->lock);
144}
145
510c321b 146void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
147{
148 if (!list_empty(&rt->rt6i_uncached)) {
149 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 150 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
151
152 spin_lock_bh(&ul->lock);
153 list_del(&rt->rt6i_uncached);
81eb8447 154 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
155 spin_unlock_bh(&ul->lock);
156 }
157}
158
159static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
160{
161 struct net_device *loopback_dev = net->loopback_dev;
162 int cpu;
163
e332bc67
EB
164 if (dev == loopback_dev)
165 return;
166
8d0b94af
MKL
167 for_each_possible_cpu(cpu) {
168 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
169 struct rt6_info *rt;
170
171 spin_lock_bh(&ul->lock);
172 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
173 struct inet6_dev *rt_idev = rt->rt6i_idev;
174 struct net_device *rt_dev = rt->dst.dev;
175
e332bc67 176 if (rt_idev->dev == dev) {
8d0b94af
MKL
177 rt->rt6i_idev = in6_dev_get(loopback_dev);
178 in6_dev_put(rt_idev);
179 }
180
e332bc67 181 if (rt_dev == dev) {
8d0b94af
MKL
182 rt->dst.dev = loopback_dev;
183 dev_hold(rt->dst.dev);
184 dev_put(rt_dev);
185 }
186 }
187 spin_unlock_bh(&ul->lock);
188 }
189}
190
f8a1b43b 191static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
192 struct sk_buff *skb,
193 const void *daddr)
39232973 194{
a7563f34 195 if (!ipv6_addr_any(p))
39232973 196 return (const void *) p;
f894cbf8
DM
197 else if (skb)
198 return &ipv6_hdr(skb)->daddr;
39232973
DM
199 return daddr;
200}
201
f8a1b43b
DA
202struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
203 struct net_device *dev,
204 struct sk_buff *skb,
205 const void *daddr)
d3aaeb38 206{
39232973
DM
207 struct neighbour *n;
208
f8a1b43b
DA
209 daddr = choose_neigh_daddr(gw, skb, daddr);
210 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
211 if (n)
212 return n;
f8a1b43b
DA
213 return neigh_create(&nd_tbl, daddr, dev);
214}
215
216static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
217 struct sk_buff *skb,
218 const void *daddr)
219{
220 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
221
222 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
f83c7790
DM
223}
224
63fca65d
JA
225static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
226{
227 struct net_device *dev = dst->dev;
228 struct rt6_info *rt = (struct rt6_info *)dst;
229
f8a1b43b 230 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
63fca65d
JA
231 if (!daddr)
232 return;
233 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
234 return;
235 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
236 return;
237 __ipv6_confirm_neigh(dev, daddr);
238}
239
9a7ec3a9 240static struct dst_ops ip6_dst_ops_template = {
1da177e4 241 .family = AF_INET6,
1da177e4
LT
242 .gc = ip6_dst_gc,
243 .gc_thresh = 1024,
244 .check = ip6_dst_check,
0dbaee3b 245 .default_advmss = ip6_default_advmss,
ebb762f2 246 .mtu = ip6_mtu,
d4ead6b3 247 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
248 .destroy = ip6_dst_destroy,
249 .ifdown = ip6_dst_ifdown,
250 .negative_advice = ip6_negative_advice,
251 .link_failure = ip6_link_failure,
252 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 253 .redirect = rt6_do_redirect,
9f8955cc 254 .local_out = __ip6_local_out,
f8a1b43b 255 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 256 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
257};
258
ebb762f2 259static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 260{
618f9bc7
SK
261 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
262
263 return mtu ? : dst->dev->mtu;
ec831ea7
RD
264}
265
6700c270
DM
266static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
267 struct sk_buff *skb, u32 mtu)
14e50e57
DM
268{
269}
270
6700c270
DM
271static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
272 struct sk_buff *skb)
b587ee3b
DM
273{
274}
275
14e50e57
DM
276static struct dst_ops ip6_dst_blackhole_ops = {
277 .family = AF_INET6,
14e50e57
DM
278 .destroy = ip6_dst_destroy,
279 .check = ip6_dst_check,
ebb762f2 280 .mtu = ip6_blackhole_mtu,
214f45c9 281 .default_advmss = ip6_default_advmss,
14e50e57 282 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 283 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 284 .cow_metrics = dst_cow_metrics_generic,
f8a1b43b 285 .neigh_lookup = ip6_dst_neigh_lookup,
14e50e57
DM
286};
287
62fa8a84 288static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 289 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
290};
291
8d1c802b 292static const struct fib6_info fib6_null_entry_template = {
93c2fb25
DA
293 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
294 .fib6_protocol = RTPROT_KERNEL,
295 .fib6_metric = ~(u32)0,
296 .fib6_ref = ATOMIC_INIT(1),
421842ed
DA
297 .fib6_type = RTN_UNREACHABLE,
298 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
299};
300
fb0af4c7 301static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
302 .dst = {
303 .__refcnt = ATOMIC_INIT(1),
304 .__use = 1,
2c20cbd7 305 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 306 .error = -ENETUNREACH,
d8d1f30b
CG
307 .input = ip6_pkt_discard,
308 .output = ip6_pkt_discard_out,
1da177e4
LT
309 },
310 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
1da177e4
LT
311};
312
101367c2
TG
313#ifdef CONFIG_IPV6_MULTIPLE_TABLES
314
fb0af4c7 315static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
316 .dst = {
317 .__refcnt = ATOMIC_INIT(1),
318 .__use = 1,
2c20cbd7 319 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 320 .error = -EACCES,
d8d1f30b
CG
321 .input = ip6_pkt_prohibit,
322 .output = ip6_pkt_prohibit_out,
101367c2
TG
323 },
324 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
325};
326
fb0af4c7 327static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
328 .dst = {
329 .__refcnt = ATOMIC_INIT(1),
330 .__use = 1,
2c20cbd7 331 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 332 .error = -EINVAL,
d8d1f30b 333 .input = dst_discard,
ede2059d 334 .output = dst_discard_out,
101367c2
TG
335 },
336 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
337};
338
339#endif
340
ebfa45f0
MKL
341static void rt6_info_init(struct rt6_info *rt)
342{
343 struct dst_entry *dst = &rt->dst;
344
345 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
ebfa45f0
MKL
346 INIT_LIST_HEAD(&rt->rt6i_uncached);
347}
348
1da177e4 349/* allocate dst with ip6_dst_ops */
93531c67
DA
350struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
351 int flags)
1da177e4 352{
97bab73f 353 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 354 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 355
81eb8447 356 if (rt) {
ebfa45f0 357 rt6_info_init(rt);
81eb8447
WW
358 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
359 }
8104891b 360
cf911662 361 return rt;
1da177e4 362}
9ab179d8 363EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 364
1da177e4
LT
365static void ip6_dst_destroy(struct dst_entry *dst)
366{
367 struct rt6_info *rt = (struct rt6_info *)dst;
a68886a6 368 struct fib6_info *from;
8d0b94af 369 struct inet6_dev *idev;
1da177e4 370
1620a336 371 ip_dst_metrics_put(dst);
8d0b94af
MKL
372 rt6_uncached_list_del(rt);
373
374 idev = rt->rt6i_idev;
38308473 375 if (idev) {
1da177e4
LT
376 rt->rt6i_idev = NULL;
377 in6_dev_put(idev);
1ab1457c 378 }
1716a961 379
a68886a6
DA
380 rcu_read_lock();
381 from = rcu_dereference(rt->from);
382 rcu_assign_pointer(rt->from, NULL);
93531c67 383 fib6_info_release(from);
a68886a6 384 rcu_read_unlock();
b3419363
DM
385}
386
1da177e4
LT
387static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
388 int how)
389{
390 struct rt6_info *rt = (struct rt6_info *)dst;
391 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 392 struct net_device *loopback_dev =
c346dca1 393 dev_net(dev)->loopback_dev;
1da177e4 394
e5645f51
WW
395 if (idev && idev->dev != loopback_dev) {
396 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
397 if (loopback_idev) {
398 rt->rt6i_idev = loopback_idev;
399 in6_dev_put(idev);
97cac082 400 }
1da177e4
LT
401 }
402}
403
5973fb1e
MKL
404static bool __rt6_check_expired(const struct rt6_info *rt)
405{
406 if (rt->rt6i_flags & RTF_EXPIRES)
407 return time_after(jiffies, rt->dst.expires);
408 else
409 return false;
410}
411
a50feda5 412static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 413{
a68886a6
DA
414 struct fib6_info *from;
415
416 from = rcu_dereference(rt->from);
417
1716a961
G
418 if (rt->rt6i_flags & RTF_EXPIRES) {
419 if (time_after(jiffies, rt->dst.expires))
a50feda5 420 return true;
a68886a6 421 } else if (from) {
1e2ea8ad 422 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
a68886a6 423 fib6_check_expired(from);
1716a961 424 }
a50feda5 425 return false;
1da177e4
LT
426}
427
3b290a31
DA
428struct fib6_info *fib6_multipath_select(const struct net *net,
429 struct fib6_info *match,
430 struct flowi6 *fl6, int oif,
431 const struct sk_buff *skb,
432 int strict)
51ebd318 433{
8d1c802b 434 struct fib6_info *sibling, *next_sibling;
51ebd318 435
b673d6cc
JS
436 /* We might have already computed the hash for ICMPv6 errors. In such
437 * case it will always be non-zero. Otherwise now is the time to do it.
438 */
439 if (!fl6->mp_hash)
b4bac172 440 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 441
5e670d84 442 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
3d709f69
IS
443 return match;
444
93c2fb25
DA
445 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
446 fib6_siblings) {
5e670d84
DA
447 int nh_upper_bound;
448
449 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
450 if (fl6->mp_hash > nh_upper_bound)
3d709f69
IS
451 continue;
452 if (rt6_score_route(sibling, oif, strict) < 0)
453 break;
454 match = sibling;
455 break;
456 }
457
51ebd318
ND
458 return match;
459}
460
1da177e4 461/*
66f5d6ce 462 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
463 */
464
8d1c802b
DA
465static inline struct fib6_info *rt6_device_match(struct net *net,
466 struct fib6_info *rt,
b71d1d42 467 const struct in6_addr *saddr,
1da177e4 468 int oif,
d420895e 469 int flags)
1da177e4 470{
8d1c802b 471 struct fib6_info *sprt;
1da177e4 472
5e670d84
DA
473 if (!oif && ipv6_addr_any(saddr) &&
474 !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
8067bb8c 475 return rt;
dd3abc4e 476
8fb11a9a 477 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
5e670d84 478 const struct net_device *dev = sprt->fib6_nh.nh_dev;
dd3abc4e 479
5e670d84 480 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
481 continue;
482
dd3abc4e 483 if (oif) {
1da177e4
LT
484 if (dev->ifindex == oif)
485 return sprt;
dd3abc4e
YH
486 } else {
487 if (ipv6_chk_addr(net, saddr, dev,
488 flags & RT6_LOOKUP_F_IFACE))
489 return sprt;
1da177e4 490 }
dd3abc4e 491 }
1da177e4 492
eea68cd3
DA
493 if (oif && flags & RT6_LOOKUP_F_IFACE)
494 return net->ipv6.fib6_null_entry;
8067bb8c 495
421842ed 496 return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
1da177e4
LT
497}
498
27097255 499#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
500struct __rt6_probe_work {
501 struct work_struct work;
502 struct in6_addr target;
503 struct net_device *dev;
504};
505
506static void rt6_probe_deferred(struct work_struct *w)
507{
508 struct in6_addr mcaddr;
509 struct __rt6_probe_work *work =
510 container_of(w, struct __rt6_probe_work, work);
511
512 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 513 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 514 dev_put(work->dev);
662f5533 515 kfree(work);
c2f17e82
HFS
516}
517
8d1c802b 518static void rt6_probe(struct fib6_info *rt)
27097255 519{
f547fac6 520 struct __rt6_probe_work *work = NULL;
5e670d84 521 const struct in6_addr *nh_gw;
f2c31e32 522 struct neighbour *neigh;
5e670d84 523 struct net_device *dev;
f547fac6 524 struct inet6_dev *idev;
5e670d84 525
27097255
YH
526 /*
527 * Okay, this does not seem to be appropriate
528 * for now, however, we need to check if it
529 * is really so; aka Router Reachability Probing.
530 *
531 * Router Reachability Probe MUST be rate-limited
532 * to no more than one per minute.
533 */
93c2fb25 534 if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
7ff74a59 535 return;
5e670d84
DA
536
537 nh_gw = &rt->fib6_nh.nh_gw;
538 dev = rt->fib6_nh.nh_dev;
2152caea 539 rcu_read_lock_bh();
f547fac6 540 idev = __in6_dev_get(dev);
5e670d84 541 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 542 if (neigh) {
8d6c31bf
MKL
543 if (neigh->nud_state & NUD_VALID)
544 goto out;
545
2152caea 546 write_lock(&neigh->lock);
990edb42
MKL
547 if (!(neigh->nud_state & NUD_VALID) &&
548 time_after(jiffies,
dcd1f572 549 neigh->updated + idev->cnf.rtr_probe_interval)) {
990edb42
MKL
550 work = kmalloc(sizeof(*work), GFP_ATOMIC);
551 if (work)
552 __neigh_set_probe_once(neigh);
c2f17e82 553 }
2152caea 554 write_unlock(&neigh->lock);
f547fac6
SD
555 } else if (time_after(jiffies, rt->last_probe +
556 idev->cnf.rtr_probe_interval)) {
990edb42 557 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 558 }
990edb42
MKL
559
560 if (work) {
f547fac6 561 rt->last_probe = jiffies;
990edb42 562 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
563 work->target = *nh_gw;
564 dev_hold(dev);
565 work->dev = dev;
990edb42
MKL
566 schedule_work(&work->work);
567 }
568
8d6c31bf 569out:
2152caea 570 rcu_read_unlock_bh();
27097255
YH
571}
572#else
8d1c802b 573static inline void rt6_probe(struct fib6_info *rt)
27097255 574{
27097255
YH
575}
576#endif
577
1da177e4 578/*
554cfb7e 579 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 580 */
8d1c802b 581static inline int rt6_check_dev(struct fib6_info *rt, int oif)
554cfb7e 582{
5e670d84
DA
583 const struct net_device *dev = rt->fib6_nh.nh_dev;
584
161980f4 585 if (!oif || dev->ifindex == oif)
554cfb7e 586 return 2;
161980f4 587 return 0;
554cfb7e 588}
1da177e4 589
8d1c802b 590static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
1da177e4 591{
afc154e9 592 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 593 struct neighbour *neigh;
f2c31e32 594
93c2fb25
DA
595 if (rt->fib6_flags & RTF_NONEXTHOP ||
596 !(rt->fib6_flags & RTF_GATEWAY))
afc154e9 597 return RT6_NUD_SUCCEED;
145a3621
YH
598
599 rcu_read_lock_bh();
5e670d84
DA
600 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
601 &rt->fib6_nh.nh_gw);
145a3621
YH
602 if (neigh) {
603 read_lock(&neigh->lock);
554cfb7e 604 if (neigh->nud_state & NUD_VALID)
afc154e9 605 ret = RT6_NUD_SUCCEED;
398bcbeb 606#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 607 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 608 ret = RT6_NUD_SUCCEED;
7e980569
JB
609 else
610 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 611#endif
145a3621 612 read_unlock(&neigh->lock);
afc154e9
HFS
613 } else {
614 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 615 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 616 }
145a3621
YH
617 rcu_read_unlock_bh();
618
a5a81f0b 619 return ret;
1da177e4
LT
620}
621
8d1c802b 622static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
1da177e4 623{
a5a81f0b 624 int m;
1ab1457c 625
4d0c5911 626 m = rt6_check_dev(rt, oif);
77d16f45 627 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 628 return RT6_NUD_FAIL_HARD;
ebacaaa0 629#ifdef CONFIG_IPV6_ROUTER_PREF
93c2fb25 630 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
ebacaaa0 631#endif
afc154e9
HFS
632 if (strict & RT6_LOOKUP_F_REACHABLE) {
633 int n = rt6_check_neigh(rt);
634 if (n < 0)
635 return n;
636 }
554cfb7e
YH
637 return m;
638}
639
dcd1f572
DA
640/* called with rc_read_lock held */
641static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
642{
643 const struct net_device *dev = fib6_info_nh_dev(f6i);
644 bool rc = false;
645
646 if (dev) {
647 const struct inet6_dev *idev = __in6_dev_get(dev);
648
649 rc = !!idev->cnf.ignore_routes_with_linkdown;
650 }
651
652 return rc;
653}
654
8d1c802b
DA
655static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
656 int *mpri, struct fib6_info *match,
afc154e9 657 bool *do_rr)
554cfb7e 658{
f11e6659 659 int m;
afc154e9 660 bool match_do_rr = false;
35103d11 661
5e670d84 662 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
663 goto out;
664
dcd1f572 665 if (fib6_ignore_linkdown(rt) &&
5e670d84 666 rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 667 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 668 goto out;
f11e6659 669
14895687 670 if (fib6_check_expired(rt))
f11e6659
DM
671 goto out;
672
673 m = rt6_score_route(rt, oif, strict);
7e980569 674 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
675 match_do_rr = true;
676 m = 0; /* lowest valid score */
7e980569 677 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 678 goto out;
afc154e9
HFS
679 }
680
681 if (strict & RT6_LOOKUP_F_REACHABLE)
682 rt6_probe(rt);
f11e6659 683
7e980569 684 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 685 if (m > *mpri) {
afc154e9 686 *do_rr = match_do_rr;
f11e6659
DM
687 *mpri = m;
688 match = rt;
f11e6659 689 }
f11e6659
DM
690out:
691 return match;
692}
693
8d1c802b
DA
694static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
695 struct fib6_info *leaf,
696 struct fib6_info *rr_head,
afc154e9
HFS
697 u32 metric, int oif, int strict,
698 bool *do_rr)
f11e6659 699{
8d1c802b 700 struct fib6_info *rt, *match, *cont;
554cfb7e 701 int mpri = -1;
1da177e4 702
f11e6659 703 match = NULL;
9fbdcfaf 704 cont = NULL;
8fb11a9a 705 for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 706 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
707 cont = rt;
708 break;
709 }
710
711 match = find_match(rt, oif, strict, &mpri, match, do_rr);
712 }
713
66f5d6ce 714 for (rt = leaf; rt && rt != rr_head;
8fb11a9a 715 rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 716 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
717 cont = rt;
718 break;
719 }
720
afc154e9 721 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
722 }
723
724 if (match || !cont)
725 return match;
726
8fb11a9a 727 for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
afc154e9 728 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 729
f11e6659
DM
730 return match;
731}
1da177e4 732
8d1c802b 733static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
8d1040e8 734 int oif, int strict)
f11e6659 735{
8d1c802b
DA
736 struct fib6_info *leaf = rcu_dereference(fn->leaf);
737 struct fib6_info *match, *rt0;
afc154e9 738 bool do_rr = false;
17ecf590 739 int key_plen;
1da177e4 740
421842ed
DA
741 if (!leaf || leaf == net->ipv6.fib6_null_entry)
742 return net->ipv6.fib6_null_entry;
8d1040e8 743
66f5d6ce 744 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 745 if (!rt0)
66f5d6ce 746 rt0 = leaf;
1da177e4 747
17ecf590
WW
748 /* Double check to make sure fn is not an intermediate node
749 * and fn->leaf does not points to its child's leaf
750 * (This might happen if all routes under fn are deleted from
751 * the tree and fib6_repair_tree() is called on the node.)
752 */
93c2fb25 753 key_plen = rt0->fib6_dst.plen;
17ecf590 754#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
755 if (rt0->fib6_src.plen)
756 key_plen = rt0->fib6_src.plen;
17ecf590
WW
757#endif
758 if (fn->fn_bit != key_plen)
421842ed 759 return net->ipv6.fib6_null_entry;
17ecf590 760
93c2fb25 761 match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
afc154e9 762 &do_rr);
1da177e4 763
afc154e9 764 if (do_rr) {
8fb11a9a 765 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
f11e6659 766
554cfb7e 767 /* no entries matched; do round-robin */
93c2fb25 768 if (!next || next->fib6_metric != rt0->fib6_metric)
8d1040e8 769 next = leaf;
f11e6659 770
66f5d6ce 771 if (next != rt0) {
93c2fb25 772 spin_lock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 773 /* make sure next is not being deleted from the tree */
93c2fb25 774 if (next->fib6_node)
66f5d6ce 775 rcu_assign_pointer(fn->rr_ptr, next);
93c2fb25 776 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 777 }
1da177e4 778 }
1da177e4 779
421842ed 780 return match ? match : net->ipv6.fib6_null_entry;
1da177e4
LT
781}
782
8d1c802b 783static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
8b9df265 784{
93c2fb25 785 return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
8b9df265
MKL
786}
787
70ceb4f5
YH
788#ifdef CONFIG_IPV6_ROUTE_INFO
789int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 790 const struct in6_addr *gwaddr)
70ceb4f5 791{
c346dca1 792 struct net *net = dev_net(dev);
70ceb4f5
YH
793 struct route_info *rinfo = (struct route_info *) opt;
794 struct in6_addr prefix_buf, *prefix;
795 unsigned int pref;
4bed72e4 796 unsigned long lifetime;
8d1c802b 797 struct fib6_info *rt;
70ceb4f5
YH
798
799 if (len < sizeof(struct route_info)) {
800 return -EINVAL;
801 }
802
803 /* Sanity check for prefix_len and length */
804 if (rinfo->length > 3) {
805 return -EINVAL;
806 } else if (rinfo->prefix_len > 128) {
807 return -EINVAL;
808 } else if (rinfo->prefix_len > 64) {
809 if (rinfo->length < 2) {
810 return -EINVAL;
811 }
812 } else if (rinfo->prefix_len > 0) {
813 if (rinfo->length < 1) {
814 return -EINVAL;
815 }
816 }
817
818 pref = rinfo->route_pref;
819 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 820 return -EINVAL;
70ceb4f5 821
4bed72e4 822 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
823
824 if (rinfo->length == 3)
825 prefix = (struct in6_addr *)rinfo->prefix;
826 else {
827 /* this function is safe */
828 ipv6_addr_prefix(&prefix_buf,
829 (struct in6_addr *)rinfo->prefix,
830 rinfo->prefix_len);
831 prefix = &prefix_buf;
832 }
833
f104a567 834 if (rinfo->prefix_len == 0)
afb1d4b5 835 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
836 else
837 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 838 gwaddr, dev);
70ceb4f5
YH
839
840 if (rt && !lifetime) {
afb1d4b5 841 ip6_del_rt(net, rt);
70ceb4f5
YH
842 rt = NULL;
843 }
844
845 if (!rt && lifetime)
830218c1
DA
846 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
847 dev, pref);
70ceb4f5 848 else if (rt)
93c2fb25
DA
849 rt->fib6_flags = RTF_ROUTEINFO |
850 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
70ceb4f5
YH
851
852 if (rt) {
1716a961 853 if (!addrconf_finite_timeout(lifetime))
14895687 854 fib6_clean_expires(rt);
1716a961 855 else
14895687 856 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 857
93531c67 858 fib6_info_release(rt);
70ceb4f5
YH
859 }
860 return 0;
861}
862#endif
863
ae90d867
DA
864/*
865 * Misc support functions
866 */
867
868/* called with rcu_lock held */
8d1c802b 869static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
ae90d867 870{
5e670d84 871 struct net_device *dev = rt->fib6_nh.nh_dev;
ae90d867 872
93c2fb25 873 if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
ae90d867
DA
874 /* for copies of local routes, dst->dev needs to be the
875 * device if it is a master device, the master device if
876 * device is enslaved, and the loopback as the default
877 */
878 if (netif_is_l3_slave(dev) &&
93c2fb25 879 !rt6_need_strict(&rt->fib6_dst.addr))
ae90d867
DA
880 dev = l3mdev_master_dev_rcu(dev);
881 else if (!netif_is_l3_master(dev))
882 dev = dev_net(dev)->loopback_dev;
883 /* last case is netif_is_l3_master(dev) is true in which
884 * case we want dev returned to be dev
885 */
886 }
887
888 return dev;
889}
890
6edb3c96
DA
891static const int fib6_prop[RTN_MAX + 1] = {
892 [RTN_UNSPEC] = 0,
893 [RTN_UNICAST] = 0,
894 [RTN_LOCAL] = 0,
895 [RTN_BROADCAST] = 0,
896 [RTN_ANYCAST] = 0,
897 [RTN_MULTICAST] = 0,
898 [RTN_BLACKHOLE] = -EINVAL,
899 [RTN_UNREACHABLE] = -EHOSTUNREACH,
900 [RTN_PROHIBIT] = -EACCES,
901 [RTN_THROW] = -EAGAIN,
902 [RTN_NAT] = -EINVAL,
903 [RTN_XRESOLVE] = -EINVAL,
904};
905
906static int ip6_rt_type_to_error(u8 fib6_type)
907{
908 return fib6_prop[fib6_type];
909}
910
8d1c802b 911static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
3b6761d1
DA
912{
913 unsigned short flags = 0;
914
915 if (rt->dst_nocount)
916 flags |= DST_NOCOUNT;
917 if (rt->dst_nopolicy)
918 flags |= DST_NOPOLICY;
919 if (rt->dst_host)
920 flags |= DST_HOST;
921
922 return flags;
923}
924
8d1c802b 925static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96
DA
926{
927 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
928
929 switch (ort->fib6_type) {
930 case RTN_BLACKHOLE:
931 rt->dst.output = dst_discard_out;
932 rt->dst.input = dst_discard;
933 break;
934 case RTN_PROHIBIT:
935 rt->dst.output = ip6_pkt_prohibit_out;
936 rt->dst.input = ip6_pkt_prohibit;
937 break;
938 case RTN_THROW:
939 case RTN_UNREACHABLE:
940 default:
941 rt->dst.output = ip6_pkt_discard_out;
942 rt->dst.input = ip6_pkt_discard;
943 break;
944 }
945}
946
8d1c802b 947static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96 948{
93c2fb25 949 if (ort->fib6_flags & RTF_REJECT) {
6edb3c96
DA
950 ip6_rt_init_dst_reject(rt, ort);
951 return;
952 }
953
954 rt->dst.error = 0;
955 rt->dst.output = ip6_output;
956
d23c4b63 957 if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
6edb3c96 958 rt->dst.input = ip6_input;
93c2fb25 959 } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
6edb3c96
DA
960 rt->dst.input = ip6_mc_input;
961 } else {
962 rt->dst.input = ip6_forward;
963 }
964
965 if (ort->fib6_nh.nh_lwtstate) {
966 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
967 lwtunnel_set_redirect(&rt->dst);
968 }
969
970 rt->dst.lastuse = jiffies;
971}
972
e873e4b9 973/* Caller must already hold reference to @from */
8d1c802b 974static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
ae90d867 975{
ae90d867 976 rt->rt6i_flags &= ~RTF_EXPIRES;
a68886a6 977 rcu_assign_pointer(rt->from, from);
e1255ed4 978 ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
ae90d867
DA
979}
980
e873e4b9 981/* Caller must already hold reference to @ort */
8d1c802b 982static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
ae90d867 983{
dcd1f572
DA
984 struct net_device *dev = fib6_info_nh_dev(ort);
985
6edb3c96
DA
986 ip6_rt_init_dst(rt, ort);
987
93c2fb25 988 rt->rt6i_dst = ort->fib6_dst;
dcd1f572 989 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
5e670d84 990 rt->rt6i_gateway = ort->fib6_nh.nh_gw;
93c2fb25 991 rt->rt6i_flags = ort->fib6_flags;
ae90d867 992 rt6_set_from(rt, ort);
ae90d867 993#ifdef CONFIG_IPV6_SUBTREES
93c2fb25 994 rt->rt6i_src = ort->fib6_src;
ae90d867 995#endif
ae90d867
DA
996}
997
a3c00e46
MKL
998static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
999 struct in6_addr *saddr)
1000{
66f5d6ce 1001 struct fib6_node *pn, *sn;
a3c00e46
MKL
1002 while (1) {
1003 if (fn->fn_flags & RTN_TL_ROOT)
1004 return NULL;
66f5d6ce
WW
1005 pn = rcu_dereference(fn->parent);
1006 sn = FIB6_SUBTREE(pn);
1007 if (sn && sn != fn)
6454743b 1008 fn = fib6_node_lookup(sn, NULL, saddr);
a3c00e46
MKL
1009 else
1010 fn = pn;
1011 if (fn->fn_flags & RTN_RTINFO)
1012 return fn;
1013 }
1014}
c71099ac 1015
d3843fe5
WW
1016static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1017 bool null_fallback)
1018{
1019 struct rt6_info *rt = *prt;
1020
1021 if (dst_hold_safe(&rt->dst))
1022 return true;
1023 if (null_fallback) {
1024 rt = net->ipv6.ip6_null_entry;
1025 dst_hold(&rt->dst);
1026 } else {
1027 rt = NULL;
1028 }
1029 *prt = rt;
1030 return false;
1031}
1032
dec9b0e2 1033/* called with rcu_lock held */
8d1c802b 1034static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
dec9b0e2 1035{
3b6761d1 1036 unsigned short flags = fib6_info_dst_flags(rt);
dec9b0e2
DA
1037 struct net_device *dev = rt->fib6_nh.nh_dev;
1038 struct rt6_info *nrt;
1039
e873e4b9
WW
1040 if (!fib6_info_hold_safe(rt))
1041 return NULL;
1042
93531c67 1043 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
dec9b0e2
DA
1044 if (nrt)
1045 ip6_rt_copy_init(nrt, rt);
e873e4b9
WW
1046 else
1047 fib6_info_release(rt);
dec9b0e2
DA
1048
1049 return nrt;
1050}
1051
8ed67789
DL
1052static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1053 struct fib6_table *table,
b75cc8f9
DA
1054 struct flowi6 *fl6,
1055 const struct sk_buff *skb,
1056 int flags)
1da177e4 1057{
8d1c802b 1058 struct fib6_info *f6i;
1da177e4 1059 struct fib6_node *fn;
23fb93a4 1060 struct rt6_info *rt;
1da177e4 1061
b6cdbc85
DA
1062 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1063 flags &= ~RT6_LOOKUP_F_IFACE;
1064
66f5d6ce 1065 rcu_read_lock();
6454743b 1066 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1067restart:
23fb93a4
DA
1068 f6i = rcu_dereference(fn->leaf);
1069 if (!f6i) {
1070 f6i = net->ipv6.fib6_null_entry;
66f5d6ce 1071 } else {
23fb93a4 1072 f6i = rt6_device_match(net, f6i, &fl6->saddr,
66f5d6ce 1073 fl6->flowi6_oif, flags);
93c2fb25 1074 if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
3b290a31
DA
1075 f6i = fib6_multipath_select(net, f6i, fl6,
1076 fl6->flowi6_oif, skb,
1077 flags);
66f5d6ce 1078 }
23fb93a4 1079 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1080 fn = fib6_backtrack(fn, &fl6->saddr);
1081 if (fn)
1082 goto restart;
1083 }
2b760fcf 1084
d4bea421 1085 trace_fib6_table_lookup(net, f6i, table, fl6);
d3843fe5 1086
2b760fcf 1087 /* Search through exception table */
23fb93a4
DA
1088 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1089 if (rt) {
dec9b0e2
DA
1090 if (ip6_hold_safe(net, &rt, true))
1091 dst_use_noref(&rt->dst, jiffies);
23fb93a4 1092 } else if (f6i == net->ipv6.fib6_null_entry) {
dec9b0e2
DA
1093 rt = net->ipv6.ip6_null_entry;
1094 dst_hold(&rt->dst);
23fb93a4
DA
1095 } else {
1096 rt = ip6_create_rt_rcu(f6i);
1097 if (!rt) {
1098 rt = net->ipv6.ip6_null_entry;
1099 dst_hold(&rt->dst);
1100 }
dec9b0e2 1101 }
b811580d 1102
66f5d6ce 1103 rcu_read_unlock();
b811580d 1104
c71099ac 1105 return rt;
c71099ac
TG
1106}
1107
67ba4152 1108struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1109 const struct sk_buff *skb, int flags)
ea6e574e 1110{
b75cc8f9 1111 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1112}
1113EXPORT_SYMBOL_GPL(ip6_route_lookup);
1114
9acd9f3a 1115struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1116 const struct in6_addr *saddr, int oif,
1117 const struct sk_buff *skb, int strict)
c71099ac 1118{
4c9483b2
DM
1119 struct flowi6 fl6 = {
1120 .flowi6_oif = oif,
1121 .daddr = *daddr,
c71099ac
TG
1122 };
1123 struct dst_entry *dst;
77d16f45 1124 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1125
adaa70bb 1126 if (saddr) {
4c9483b2 1127 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1128 flags |= RT6_LOOKUP_F_HAS_SADDR;
1129 }
1130
b75cc8f9 1131 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1132 if (dst->error == 0)
1133 return (struct rt6_info *) dst;
1134
1135 dst_release(dst);
1136
1da177e4
LT
1137 return NULL;
1138}
7159039a
YH
1139EXPORT_SYMBOL(rt6_lookup);
1140
c71099ac 1141/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1142 * It takes new route entry, the addition fails by any reason the
1143 * route is released.
1144 * Caller must hold dst before calling it.
1da177e4
LT
1145 */
1146
8d1c802b 1147static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
333c4301 1148 struct netlink_ext_ack *extack)
1da177e4
LT
1149{
1150 int err;
c71099ac 1151 struct fib6_table *table;
1da177e4 1152
93c2fb25 1153 table = rt->fib6_table;
66f5d6ce 1154 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1155 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1156 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1157
1158 return err;
1159}
1160
8d1c802b 1161int ip6_ins_rt(struct net *net, struct fib6_info *rt)
40e22e8f 1162{
afb1d4b5 1163 struct nl_info info = { .nl_net = net, };
e715b6d3 1164
d4ead6b3 1165 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1166}
1167
8d1c802b 1168static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
8b9df265
MKL
1169 const struct in6_addr *daddr,
1170 const struct in6_addr *saddr)
1da177e4 1171{
4832c30d 1172 struct net_device *dev;
1da177e4
LT
1173 struct rt6_info *rt;
1174
1175 /*
1176 * Clone the route.
1177 */
1178
e873e4b9
WW
1179 if (!fib6_info_hold_safe(ort))
1180 return NULL;
1181
4832c30d 1182 dev = ip6_rt_get_dev_rcu(ort);
93531c67 1183 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
e873e4b9
WW
1184 if (!rt) {
1185 fib6_info_release(ort);
83a09abd 1186 return NULL;
e873e4b9 1187 }
83a09abd
MKL
1188
1189 ip6_rt_copy_init(rt, ort);
1190 rt->rt6i_flags |= RTF_CACHE;
83a09abd
MKL
1191 rt->dst.flags |= DST_HOST;
1192 rt->rt6i_dst.addr = *daddr;
1193 rt->rt6i_dst.plen = 128;
1da177e4 1194
83a09abd 1195 if (!rt6_is_gw_or_nonexthop(ort)) {
93c2fb25
DA
1196 if (ort->fib6_dst.plen != 128 &&
1197 ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
83a09abd 1198 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1199#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1200 if (rt->rt6i_src.plen && saddr) {
1201 rt->rt6i_src.addr = *saddr;
1202 rt->rt6i_src.plen = 128;
8b9df265 1203 }
83a09abd 1204#endif
95a9a5ba 1205 }
1da177e4 1206
95a9a5ba
YH
1207 return rt;
1208}
1da177e4 1209
8d1c802b 1210static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
d52d3997 1211{
3b6761d1 1212 unsigned short flags = fib6_info_dst_flags(rt);
4832c30d 1213 struct net_device *dev;
d52d3997
MKL
1214 struct rt6_info *pcpu_rt;
1215
e873e4b9
WW
1216 if (!fib6_info_hold_safe(rt))
1217 return NULL;
1218
4832c30d
DA
1219 rcu_read_lock();
1220 dev = ip6_rt_get_dev_rcu(rt);
93531c67 1221 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1222 rcu_read_unlock();
e873e4b9
WW
1223 if (!pcpu_rt) {
1224 fib6_info_release(rt);
d52d3997 1225 return NULL;
e873e4b9 1226 }
d52d3997 1227 ip6_rt_copy_init(pcpu_rt, rt);
d52d3997
MKL
1228 pcpu_rt->rt6i_flags |= RTF_PCPU;
1229 return pcpu_rt;
1230}
1231
66f5d6ce 1232/* It should be called with rcu_read_lock() acquired */
8d1c802b 1233static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
d52d3997 1234{
a73e4195 1235 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1236
1237 p = this_cpu_ptr(rt->rt6i_pcpu);
1238 pcpu_rt = *p;
1239
d4ead6b3
DA
1240 if (pcpu_rt)
1241 ip6_hold_safe(NULL, &pcpu_rt, false);
d3843fe5 1242
a73e4195
MKL
1243 return pcpu_rt;
1244}
1245
afb1d4b5 1246static struct rt6_info *rt6_make_pcpu_route(struct net *net,
8d1c802b 1247 struct fib6_info *rt)
a73e4195
MKL
1248{
1249 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1250
1251 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1252 if (!pcpu_rt) {
9c7370a1
MKL
1253 dst_hold(&net->ipv6.ip6_null_entry->dst);
1254 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1255 }
1256
a94b9367
WW
1257 dst_hold(&pcpu_rt->dst);
1258 p = this_cpu_ptr(rt->rt6i_pcpu);
1259 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1260 BUG_ON(prev);
a94b9367 1261
d52d3997
MKL
1262 return pcpu_rt;
1263}
1264
35732d01
WW
1265/* exception hash table implementation
1266 */
1267static DEFINE_SPINLOCK(rt6_exception_lock);
1268
1269/* Remove rt6_ex from hash table and free the memory
1270 * Caller must hold rt6_exception_lock
1271 */
1272static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1273 struct rt6_exception *rt6_ex)
1274{
65cafef4 1275 struct fib6_info *from;
b2427e67 1276 struct net *net;
81eb8447 1277
35732d01
WW
1278 if (!bucket || !rt6_ex)
1279 return;
b2427e67
CIK
1280
1281 net = dev_net(rt6_ex->rt6i->dst.dev);
65cafef4
PA
1282 net->ipv6.rt6_stats->fib_rt_cache--;
1283
1284 /* purge completely the exception to allow releasing the held resources:
1285 * some [sk] cache may keep the dst around for unlimited time
1286 */
1287 from = rcu_dereference_protected(rt6_ex->rt6i->from,
1288 lockdep_is_held(&rt6_exception_lock));
1289 rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
1290 fib6_info_release(from);
1291 dst_dev_put(&rt6_ex->rt6i->dst);
1292
35732d01 1293 hlist_del_rcu(&rt6_ex->hlist);
77634cc6 1294 dst_release(&rt6_ex->rt6i->dst);
35732d01
WW
1295 kfree_rcu(rt6_ex, rcu);
1296 WARN_ON_ONCE(!bucket->depth);
1297 bucket->depth--;
1298}
1299
1300/* Remove oldest rt6_ex in bucket and free the memory
1301 * Caller must hold rt6_exception_lock
1302 */
1303static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1304{
1305 struct rt6_exception *rt6_ex, *oldest = NULL;
1306
1307 if (!bucket)
1308 return;
1309
1310 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1311 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1312 oldest = rt6_ex;
1313 }
1314 rt6_remove_exception(bucket, oldest);
1315}
1316
1317static u32 rt6_exception_hash(const struct in6_addr *dst,
1318 const struct in6_addr *src)
1319{
1320 static u32 seed __read_mostly;
1321 u32 val;
1322
1323 net_get_random_once(&seed, sizeof(seed));
1324 val = jhash(dst, sizeof(*dst), seed);
1325
1326#ifdef CONFIG_IPV6_SUBTREES
1327 if (src)
1328 val = jhash(src, sizeof(*src), val);
1329#endif
1330 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1331}
1332
1333/* Helper function to find the cached rt in the hash table
1334 * and update bucket pointer to point to the bucket for this
1335 * (daddr, saddr) pair
1336 * Caller must hold rt6_exception_lock
1337 */
1338static struct rt6_exception *
1339__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1340 const struct in6_addr *daddr,
1341 const struct in6_addr *saddr)
1342{
1343 struct rt6_exception *rt6_ex;
1344 u32 hval;
1345
1346 if (!(*bucket) || !daddr)
1347 return NULL;
1348
1349 hval = rt6_exception_hash(daddr, saddr);
1350 *bucket += hval;
1351
1352 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1353 struct rt6_info *rt6 = rt6_ex->rt6i;
1354 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1355
1356#ifdef CONFIG_IPV6_SUBTREES
1357 if (matched && saddr)
1358 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1359#endif
1360 if (matched)
1361 return rt6_ex;
1362 }
1363 return NULL;
1364}
1365
1366/* Helper function to find the cached rt in the hash table
1367 * and update bucket pointer to point to the bucket for this
1368 * (daddr, saddr) pair
1369 * Caller must hold rcu_read_lock()
1370 */
1371static struct rt6_exception *
1372__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1373 const struct in6_addr *daddr,
1374 const struct in6_addr *saddr)
1375{
1376 struct rt6_exception *rt6_ex;
1377 u32 hval;
1378
1379 WARN_ON_ONCE(!rcu_read_lock_held());
1380
1381 if (!(*bucket) || !daddr)
1382 return NULL;
1383
1384 hval = rt6_exception_hash(daddr, saddr);
1385 *bucket += hval;
1386
1387 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1388 struct rt6_info *rt6 = rt6_ex->rt6i;
1389 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1390
1391#ifdef CONFIG_IPV6_SUBTREES
1392 if (matched && saddr)
1393 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1394#endif
1395 if (matched)
1396 return rt6_ex;
1397 }
1398 return NULL;
1399}
1400
8d1c802b 1401static unsigned int fib6_mtu(const struct fib6_info *rt)
d4ead6b3
DA
1402{
1403 unsigned int mtu;
1404
dcd1f572
DA
1405 if (rt->fib6_pmtu) {
1406 mtu = rt->fib6_pmtu;
1407 } else {
1408 struct net_device *dev = fib6_info_nh_dev(rt);
1409 struct inet6_dev *idev;
1410
1411 rcu_read_lock();
1412 idev = __in6_dev_get(dev);
1413 mtu = idev->cnf.mtu6;
1414 rcu_read_unlock();
1415 }
1416
d4ead6b3
DA
1417 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1418
1419 return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1420}
1421
35732d01 1422static int rt6_insert_exception(struct rt6_info *nrt,
8d1c802b 1423 struct fib6_info *ort)
35732d01 1424{
5e670d84 1425 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1426 struct rt6_exception_bucket *bucket;
1427 struct in6_addr *src_key = NULL;
1428 struct rt6_exception *rt6_ex;
1429 int err = 0;
1430
35732d01
WW
1431 spin_lock_bh(&rt6_exception_lock);
1432
1433 if (ort->exception_bucket_flushed) {
1434 err = -EINVAL;
1435 goto out;
1436 }
1437
1438 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1439 lockdep_is_held(&rt6_exception_lock));
1440 if (!bucket) {
1441 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1442 GFP_ATOMIC);
1443 if (!bucket) {
1444 err = -ENOMEM;
1445 goto out;
1446 }
1447 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1448 }
1449
1450#ifdef CONFIG_IPV6_SUBTREES
1451 /* rt6i_src.plen != 0 indicates ort is in subtree
1452 * and exception table is indexed by a hash of
1453 * both rt6i_dst and rt6i_src.
1454 * Otherwise, the exception table is indexed by
1455 * a hash of only rt6i_dst.
1456 */
93c2fb25 1457 if (ort->fib6_src.plen)
35732d01
WW
1458 src_key = &nrt->rt6i_src.addr;
1459#endif
f5bbe7ee
WW
1460 /* rt6_mtu_change() might lower mtu on ort.
1461 * Only insert this exception route if its mtu
1462 * is less than ort's mtu value.
1463 */
d4ead6b3 1464 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
f5bbe7ee
WW
1465 err = -EINVAL;
1466 goto out;
1467 }
60006a48 1468
35732d01
WW
1469 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1470 src_key);
1471 if (rt6_ex)
1472 rt6_remove_exception(bucket, rt6_ex);
1473
1474 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1475 if (!rt6_ex) {
1476 err = -ENOMEM;
1477 goto out;
1478 }
1479 rt6_ex->rt6i = nrt;
1480 rt6_ex->stamp = jiffies;
35732d01
WW
1481 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1482 bucket->depth++;
81eb8447 1483 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1484
1485 if (bucket->depth > FIB6_MAX_DEPTH)
1486 rt6_exception_remove_oldest(bucket);
1487
1488out:
1489 spin_unlock_bh(&rt6_exception_lock);
1490
1491 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1492 if (!err) {
93c2fb25 1493 spin_lock_bh(&ort->fib6_table->tb6_lock);
7aef6859 1494 fib6_update_sernum(net, ort);
93c2fb25 1495 spin_unlock_bh(&ort->fib6_table->tb6_lock);
b886d5f2
PA
1496 fib6_force_start_gc(net);
1497 }
35732d01
WW
1498
1499 return err;
1500}
1501
8d1c802b 1502void rt6_flush_exceptions(struct fib6_info *rt)
35732d01
WW
1503{
1504 struct rt6_exception_bucket *bucket;
1505 struct rt6_exception *rt6_ex;
1506 struct hlist_node *tmp;
1507 int i;
1508
1509 spin_lock_bh(&rt6_exception_lock);
1510 /* Prevent rt6_insert_exception() to recreate the bucket list */
1511 rt->exception_bucket_flushed = 1;
1512
1513 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1514 lockdep_is_held(&rt6_exception_lock));
1515 if (!bucket)
1516 goto out;
1517
1518 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1519 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1520 rt6_remove_exception(bucket, rt6_ex);
1521 WARN_ON_ONCE(bucket->depth);
1522 bucket++;
1523 }
1524
1525out:
1526 spin_unlock_bh(&rt6_exception_lock);
1527}
1528
1529/* Find cached rt in the hash table inside passed in rt
1530 * Caller has to hold rcu_read_lock()
1531 */
8d1c802b 1532static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
1533 struct in6_addr *daddr,
1534 struct in6_addr *saddr)
1535{
1536 struct rt6_exception_bucket *bucket;
1537 struct in6_addr *src_key = NULL;
1538 struct rt6_exception *rt6_ex;
1539 struct rt6_info *res = NULL;
1540
1541 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1542
1543#ifdef CONFIG_IPV6_SUBTREES
1544 /* rt6i_src.plen != 0 indicates rt is in subtree
1545 * and exception table is indexed by a hash of
1546 * both rt6i_dst and rt6i_src.
1547 * Otherwise, the exception table is indexed by
1548 * a hash of only rt6i_dst.
1549 */
93c2fb25 1550 if (rt->fib6_src.plen)
35732d01
WW
1551 src_key = saddr;
1552#endif
1553 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1554
1555 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1556 res = rt6_ex->rt6i;
1557
1558 return res;
1559}
1560
1561/* Remove the passed in cached rt from the hash table that contains it */
23fb93a4 1562static int rt6_remove_exception_rt(struct rt6_info *rt)
35732d01 1563{
35732d01
WW
1564 struct rt6_exception_bucket *bucket;
1565 struct in6_addr *src_key = NULL;
1566 struct rt6_exception *rt6_ex;
8a14e46f 1567 struct fib6_info *from;
35732d01
WW
1568 int err;
1569
091311de 1570 from = rcu_dereference(rt->from);
35732d01 1571 if (!from ||
442d713b 1572 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1573 return -EINVAL;
1574
1575 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1576 return -ENOENT;
1577
1578 spin_lock_bh(&rt6_exception_lock);
1579 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1580 lockdep_is_held(&rt6_exception_lock));
1581#ifdef CONFIG_IPV6_SUBTREES
1582 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1583 * and exception table is indexed by a hash of
1584 * both rt6i_dst and rt6i_src.
1585 * Otherwise, the exception table is indexed by
1586 * a hash of only rt6i_dst.
1587 */
93c2fb25 1588 if (from->fib6_src.plen)
35732d01
WW
1589 src_key = &rt->rt6i_src.addr;
1590#endif
1591 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1592 &rt->rt6i_dst.addr,
1593 src_key);
1594 if (rt6_ex) {
1595 rt6_remove_exception(bucket, rt6_ex);
1596 err = 0;
1597 } else {
1598 err = -ENOENT;
1599 }
1600
1601 spin_unlock_bh(&rt6_exception_lock);
1602 return err;
1603}
1604
1605/* Find rt6_ex which contains the passed in rt cache and
1606 * refresh its stamp
1607 */
1608static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1609{
35732d01
WW
1610 struct rt6_exception_bucket *bucket;
1611 struct in6_addr *src_key = NULL;
1612 struct rt6_exception *rt6_ex;
1f1cdcc0 1613 struct fib6_info *from;
35732d01
WW
1614
1615 rcu_read_lock();
1f1cdcc0
PA
1616 from = rcu_dereference(rt->from);
1617 if (!from || !(rt->rt6i_flags & RTF_CACHE))
1618 goto unlock;
1619
35732d01
WW
1620 bucket = rcu_dereference(from->rt6i_exception_bucket);
1621
1622#ifdef CONFIG_IPV6_SUBTREES
1623 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1624 * and exception table is indexed by a hash of
1625 * both rt6i_dst and rt6i_src.
1626 * Otherwise, the exception table is indexed by
1627 * a hash of only rt6i_dst.
1628 */
93c2fb25 1629 if (from->fib6_src.plen)
35732d01
WW
1630 src_key = &rt->rt6i_src.addr;
1631#endif
1632 rt6_ex = __rt6_find_exception_rcu(&bucket,
1633 &rt->rt6i_dst.addr,
1634 src_key);
1635 if (rt6_ex)
1636 rt6_ex->stamp = jiffies;
1637
1f1cdcc0 1638unlock:
35732d01
WW
1639 rcu_read_unlock();
1640}
1641
e9fa1495
SB
1642static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1643 struct rt6_info *rt, int mtu)
1644{
1645 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1646 * lowest MTU in the path: always allow updating the route PMTU to
1647 * reflect PMTU decreases.
1648 *
1649 * If the new MTU is higher, and the route PMTU is equal to the local
1650 * MTU, this means the old MTU is the lowest in the path, so allow
1651 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1652 * handle this.
1653 */
1654
1655 if (dst_mtu(&rt->dst) >= mtu)
1656 return true;
1657
1658 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1659 return true;
1660
1661 return false;
1662}
1663
1664static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
8d1c802b 1665 struct fib6_info *rt, int mtu)
f5bbe7ee
WW
1666{
1667 struct rt6_exception_bucket *bucket;
1668 struct rt6_exception *rt6_ex;
1669 int i;
1670
1671 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1672 lockdep_is_held(&rt6_exception_lock));
1673
e9fa1495
SB
1674 if (!bucket)
1675 return;
1676
1677 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1678 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1679 struct rt6_info *entry = rt6_ex->rt6i;
1680
1681 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1682 * route), the metrics of its rt->from have already
e9fa1495
SB
1683 * been updated.
1684 */
d4ead6b3 1685 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1686 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1687 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1688 }
e9fa1495 1689 bucket++;
f5bbe7ee
WW
1690 }
1691}
1692
b16cb459
WW
1693#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1694
8d1c802b 1695static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
b16cb459
WW
1696 struct in6_addr *gateway)
1697{
1698 struct rt6_exception_bucket *bucket;
1699 struct rt6_exception *rt6_ex;
1700 struct hlist_node *tmp;
1701 int i;
1702
1703 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1704 return;
1705
1706 spin_lock_bh(&rt6_exception_lock);
1707 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1708 lockdep_is_held(&rt6_exception_lock));
1709
1710 if (bucket) {
1711 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1712 hlist_for_each_entry_safe(rt6_ex, tmp,
1713 &bucket->chain, hlist) {
1714 struct rt6_info *entry = rt6_ex->rt6i;
1715
1716 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1717 RTF_CACHE_GATEWAY &&
1718 ipv6_addr_equal(gateway,
1719 &entry->rt6i_gateway)) {
1720 rt6_remove_exception(bucket, rt6_ex);
1721 }
1722 }
1723 bucket++;
1724 }
1725 }
1726
1727 spin_unlock_bh(&rt6_exception_lock);
1728}
1729
c757faa8
WW
1730static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1731 struct rt6_exception *rt6_ex,
1732 struct fib6_gc_args *gc_args,
1733 unsigned long now)
1734{
1735 struct rt6_info *rt = rt6_ex->rt6i;
1736
1859bac0
PA
1737 /* we are pruning and obsoleting aged-out and non gateway exceptions
1738 * even if others have still references to them, so that on next
1739 * dst_check() such references can be dropped.
1740 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1741 * expired, independently from their aging, as per RFC 8201 section 4
1742 */
31afeb42
WW
1743 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1744 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1745 RT6_TRACE("aging clone %p\n", rt);
1746 rt6_remove_exception(bucket, rt6_ex);
1747 return;
1748 }
1749 } else if (time_after(jiffies, rt->dst.expires)) {
1750 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1751 rt6_remove_exception(bucket, rt6_ex);
1752 return;
31afeb42
WW
1753 }
1754
1755 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1756 struct neighbour *neigh;
1757 __u8 neigh_flags = 0;
1758
1bfa26ff
ED
1759 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1760 if (neigh)
c757faa8 1761 neigh_flags = neigh->flags;
1bfa26ff 1762
c757faa8
WW
1763 if (!(neigh_flags & NTF_ROUTER)) {
1764 RT6_TRACE("purging route %p via non-router but gateway\n",
1765 rt);
1766 rt6_remove_exception(bucket, rt6_ex);
1767 return;
1768 }
1769 }
31afeb42 1770
c757faa8
WW
1771 gc_args->more++;
1772}
1773
8d1c802b 1774void rt6_age_exceptions(struct fib6_info *rt,
c757faa8
WW
1775 struct fib6_gc_args *gc_args,
1776 unsigned long now)
1777{
1778 struct rt6_exception_bucket *bucket;
1779 struct rt6_exception *rt6_ex;
1780 struct hlist_node *tmp;
1781 int i;
1782
1783 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1784 return;
1785
1bfa26ff
ED
1786 rcu_read_lock_bh();
1787 spin_lock(&rt6_exception_lock);
c757faa8
WW
1788 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1789 lockdep_is_held(&rt6_exception_lock));
1790
1791 if (bucket) {
1792 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1793 hlist_for_each_entry_safe(rt6_ex, tmp,
1794 &bucket->chain, hlist) {
1795 rt6_age_examine_exception(bucket, rt6_ex,
1796 gc_args, now);
1797 }
1798 bucket++;
1799 }
1800 }
1bfa26ff
ED
1801 spin_unlock(&rt6_exception_lock);
1802 rcu_read_unlock_bh();
c757faa8
WW
1803}
1804
1d053da9
DA
1805/* must be called with rcu lock held */
1806struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
1807 int oif, struct flowi6 *fl6, int strict)
1da177e4 1808{
367efcb9 1809 struct fib6_node *fn, *saved_fn;
8d1c802b 1810 struct fib6_info *f6i;
1da177e4 1811
6454743b 1812 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1813 saved_fn = fn;
1da177e4 1814
ca254490
DA
1815 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1816 oif = 0;
1817
a3c00e46 1818redo_rt6_select:
23fb93a4 1819 f6i = rt6_select(net, fn, oif, strict);
23fb93a4 1820 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1821 fn = fib6_backtrack(fn, &fl6->saddr);
1822 if (fn)
1823 goto redo_rt6_select;
367efcb9
MKL
1824 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1825 /* also consider unreachable route */
1826 strict &= ~RT6_LOOKUP_F_REACHABLE;
1827 fn = saved_fn;
1828 goto redo_rt6_select;
367efcb9 1829 }
a3c00e46
MKL
1830 }
1831
d4bea421 1832 trace_fib6_table_lookup(net, f6i, table, fl6);
fb9de91e 1833
1d053da9
DA
1834 return f6i;
1835}
1836
1837struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1838 int oif, struct flowi6 *fl6,
1839 const struct sk_buff *skb, int flags)
1840{
1841 struct fib6_info *f6i;
1842 struct rt6_info *rt;
1843 int strict = 0;
1844
1845 strict |= flags & RT6_LOOKUP_F_IFACE;
1846 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1847 if (net->ipv6.devconf_all->forwarding == 0)
1848 strict |= RT6_LOOKUP_F_REACHABLE;
1849
1850 rcu_read_lock();
1851
1852 f6i = fib6_table_lookup(net, table, oif, fl6, strict);
1853 if (f6i->fib6_nsiblings)
1854 f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
1855
23fb93a4 1856 if (f6i == net->ipv6.fib6_null_entry) {
421842ed 1857 rt = net->ipv6.ip6_null_entry;
66f5d6ce 1858 rcu_read_unlock();
d3843fe5 1859 dst_hold(&rt->dst);
d3843fe5 1860 return rt;
23fb93a4
DA
1861 }
1862
1863 /*Search through exception table */
1864 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1865 if (rt) {
d4ead6b3 1866 if (ip6_hold_safe(net, &rt, true))
d3843fe5 1867 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 1868
66f5d6ce 1869 rcu_read_unlock();
d52d3997 1870 return rt;
3da59bd9 1871 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
93c2fb25 1872 !(f6i->fib6_flags & RTF_GATEWAY))) {
3da59bd9
MKL
1873 /* Create a RTF_CACHE clone which will not be
1874 * owned by the fib6 tree. It is for the special case where
1875 * the daddr in the skb during the neighbor look-up is different
1876 * from the fl6->daddr used to look-up route here.
1877 */
3da59bd9
MKL
1878 struct rt6_info *uncached_rt;
1879
23fb93a4 1880 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
d52d3997 1881
4d85cd0c 1882 rcu_read_unlock();
c71099ac 1883
1cfb71ee
WW
1884 if (uncached_rt) {
1885 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1886 * No need for another dst_hold()
1887 */
8d0b94af 1888 rt6_uncached_list_add(uncached_rt);
81eb8447 1889 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1890 } else {
3da59bd9 1891 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1892 dst_hold(&uncached_rt->dst);
1893 }
b811580d 1894
3da59bd9 1895 return uncached_rt;
d52d3997
MKL
1896 } else {
1897 /* Get a percpu copy */
1898
1899 struct rt6_info *pcpu_rt;
1900
951f788a 1901 local_bh_disable();
23fb93a4 1902 pcpu_rt = rt6_get_pcpu_route(f6i);
d52d3997 1903
93531c67
DA
1904 if (!pcpu_rt)
1905 pcpu_rt = rt6_make_pcpu_route(net, f6i);
1906
951f788a
ED
1907 local_bh_enable();
1908 rcu_read_unlock();
d4bea421 1909
d52d3997
MKL
1910 return pcpu_rt;
1911 }
1da177e4 1912}
9ff74384 1913EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1914
b75cc8f9
DA
1915static struct rt6_info *ip6_pol_route_input(struct net *net,
1916 struct fib6_table *table,
1917 struct flowi6 *fl6,
1918 const struct sk_buff *skb,
1919 int flags)
4acad72d 1920{
b75cc8f9 1921 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1922}
1923
d409b847
MB
1924struct dst_entry *ip6_route_input_lookup(struct net *net,
1925 struct net_device *dev,
b75cc8f9
DA
1926 struct flowi6 *fl6,
1927 const struct sk_buff *skb,
1928 int flags)
72331bc0
SL
1929{
1930 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1931 flags |= RT6_LOOKUP_F_IFACE;
1932
b75cc8f9 1933 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1934}
d409b847 1935EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1936
23aebdac 1937static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1938 struct flow_keys *keys,
1939 struct flow_keys *flkeys)
23aebdac
JS
1940{
1941 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1942 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1943 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1944 const struct ipv6hdr *inner_iph;
1945 const struct icmp6hdr *icmph;
1946 struct ipv6hdr _inner_iph;
cea67a2d 1947 struct icmp6hdr _icmph;
23aebdac
JS
1948
1949 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1950 goto out;
1951
cea67a2d
ED
1952 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1953 sizeof(_icmph), &_icmph);
1954 if (!icmph)
1955 goto out;
1956
23aebdac
JS
1957 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1958 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1959 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1960 icmph->icmp6_type != ICMPV6_PARAMPROB)
1961 goto out;
1962
1963 inner_iph = skb_header_pointer(skb,
1964 skb_transport_offset(skb) + sizeof(*icmph),
1965 sizeof(_inner_iph), &_inner_iph);
1966 if (!inner_iph)
1967 goto out;
1968
1969 key_iph = inner_iph;
5e5d6fed 1970 _flkeys = NULL;
23aebdac 1971out:
5e5d6fed
RP
1972 if (_flkeys) {
1973 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1974 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1975 keys->tags.flow_label = _flkeys->tags.flow_label;
1976 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1977 } else {
1978 keys->addrs.v6addrs.src = key_iph->saddr;
1979 keys->addrs.v6addrs.dst = key_iph->daddr;
fa1be7e0 1980 keys->tags.flow_label = ip6_flowlabel(key_iph);
5e5d6fed
RP
1981 keys->basic.ip_proto = key_iph->nexthdr;
1982 }
23aebdac
JS
1983}
1984
1985/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
1986u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1987 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
1988{
1989 struct flow_keys hash_keys;
9a2a537a 1990 u32 mhash;
23aebdac 1991
bbfa047a 1992 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
1993 case 0:
1994 memset(&hash_keys, 0, sizeof(hash_keys));
1995 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1996 if (skb) {
1997 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
1998 } else {
1999 hash_keys.addrs.v6addrs.src = fl6->saddr;
2000 hash_keys.addrs.v6addrs.dst = fl6->daddr;
fa1be7e0 2001 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
b4bac172
DA
2002 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2003 }
2004 break;
2005 case 1:
2006 if (skb) {
2007 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2008 struct flow_keys keys;
2009
2010 /* short-circuit if we already have L4 hash present */
2011 if (skb->l4_hash)
2012 return skb_get_hash_raw(skb) >> 1;
2013
2014 memset(&hash_keys, 0, sizeof(hash_keys));
2015
2016 if (!flkeys) {
2017 skb_flow_dissect_flow_keys(skb, &keys, flag);
2018 flkeys = &keys;
2019 }
2020 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2021 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2022 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2023 hash_keys.ports.src = flkeys->ports.src;
2024 hash_keys.ports.dst = flkeys->ports.dst;
2025 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2026 } else {
2027 memset(&hash_keys, 0, sizeof(hash_keys));
2028 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2029 hash_keys.addrs.v6addrs.src = fl6->saddr;
2030 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2031 hash_keys.ports.src = fl6->fl6_sport;
2032 hash_keys.ports.dst = fl6->fl6_dport;
2033 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2034 }
2035 break;
23aebdac 2036 }
9a2a537a 2037 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2038
9a2a537a 2039 return mhash >> 1;
23aebdac
JS
2040}
2041
c71099ac
TG
2042void ip6_route_input(struct sk_buff *skb)
2043{
b71d1d42 2044 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2045 struct net *net = dev_net(skb->dev);
adaa70bb 2046 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2047 struct ip_tunnel_info *tun_info;
4c9483b2 2048 struct flowi6 fl6 = {
e0d56fdd 2049 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2050 .daddr = iph->daddr,
2051 .saddr = iph->saddr,
6502ca52 2052 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2053 .flowi6_mark = skb->mark,
2054 .flowi6_proto = iph->nexthdr,
c71099ac 2055 };
5e5d6fed 2056 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2057
904af04d 2058 tun_info = skb_tunnel_info(skb);
46fa062a 2059 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2060 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2061
2062 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2063 flkeys = &_flkeys;
2064
23aebdac 2065 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2066 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2067 skb_dst_drop(skb);
b75cc8f9
DA
2068 skb_dst_set(skb,
2069 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2070}
2071
b75cc8f9
DA
2072static struct rt6_info *ip6_pol_route_output(struct net *net,
2073 struct fib6_table *table,
2074 struct flowi6 *fl6,
2075 const struct sk_buff *skb,
2076 int flags)
1da177e4 2077{
b75cc8f9 2078 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2079}
2080
6f21c96a
PA
2081struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2082 struct flowi6 *fl6, int flags)
c71099ac 2083{
d46a9d67 2084 bool any_src;
c71099ac 2085
3ede0bbc
RS
2086 if (ipv6_addr_type(&fl6->daddr) &
2087 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
4c1feac5
DA
2088 struct dst_entry *dst;
2089
2090 dst = l3mdev_link_scope_lookup(net, fl6);
2091 if (dst)
2092 return dst;
2093 }
ca254490 2094
1fb9489b 2095 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2096
d46a9d67 2097 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2098 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2099 (fl6->flowi6_oif && any_src))
77d16f45 2100 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2101
d46a9d67 2102 if (!any_src)
adaa70bb 2103 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2104 else if (sk)
2105 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2106
b75cc8f9 2107 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2108}
6f21c96a 2109EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2110
2774c131 2111struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2112{
5c1e6aa3 2113 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2114 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2115 struct dst_entry *new = NULL;
2116
1dbe3252 2117 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2118 DST_OBSOLETE_DEAD, 0);
14e50e57 2119 if (rt) {
0a1f5962 2120 rt6_info_init(rt);
81eb8447 2121 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2122
0a1f5962 2123 new = &rt->dst;
14e50e57 2124 new->__use = 1;
352e512c 2125 new->input = dst_discard;
ede2059d 2126 new->output = dst_discard_out;
14e50e57 2127
0a1f5962 2128 dst_copy_metrics(new, &ort->dst);
14e50e57 2129
1dbe3252 2130 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2131 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2132 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2133
2134 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2135#ifdef CONFIG_IPV6_SUBTREES
2136 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2137#endif
14e50e57
DM
2138 }
2139
69ead7af
DM
2140 dst_release(dst_orig);
2141 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2142}
14e50e57 2143
1da177e4
LT
2144/*
2145 * Destination cache support functions
2146 */
2147
8d1c802b 2148static bool fib6_check(struct fib6_info *f6i, u32 cookie)
4b32b5ad 2149{
93531c67
DA
2150 u32 rt_cookie = 0;
2151
8ae86971 2152 if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
93531c67
DA
2153 return false;
2154
2155 if (fib6_check_expired(f6i))
2156 return false;
2157
2158 return true;
4b32b5ad
MKL
2159}
2160
a68886a6
DA
2161static struct dst_entry *rt6_check(struct rt6_info *rt,
2162 struct fib6_info *from,
2163 u32 cookie)
3da59bd9 2164{
36143645 2165 u32 rt_cookie = 0;
c5cff856 2166
a68886a6 2167 if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
93531c67 2168 rt_cookie != cookie)
3da59bd9
MKL
2169 return NULL;
2170
2171 if (rt6_check_expired(rt))
2172 return NULL;
2173
2174 return &rt->dst;
2175}
2176
a68886a6
DA
2177static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2178 struct fib6_info *from,
2179 u32 cookie)
3da59bd9 2180{
5973fb1e
MKL
2181 if (!__rt6_check_expired(rt) &&
2182 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
a68886a6 2183 fib6_check(from, cookie))
3da59bd9
MKL
2184 return &rt->dst;
2185 else
2186 return NULL;
2187}
2188
1da177e4
LT
2189static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2190{
a87b7dc9 2191 struct dst_entry *dst_ret;
a68886a6 2192 struct fib6_info *from;
1da177e4
LT
2193 struct rt6_info *rt;
2194
a87b7dc9
DA
2195 rt = container_of(dst, struct rt6_info, dst);
2196
2197 rcu_read_lock();
1da177e4 2198
6f3118b5
ND
2199 /* All IPV6 dsts are created with ->obsolete set to the value
2200 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2201 * into this function always.
2202 */
e3bc10bd 2203
a68886a6 2204 from = rcu_dereference(rt->from);
4b32b5ad 2205
a68886a6
DA
2206 if (from && (rt->rt6i_flags & RTF_PCPU ||
2207 unlikely(!list_empty(&rt->rt6i_uncached))))
2208 dst_ret = rt6_dst_from_check(rt, from, cookie);
3da59bd9 2209 else
a68886a6 2210 dst_ret = rt6_check(rt, from, cookie);
a87b7dc9
DA
2211
2212 rcu_read_unlock();
2213
2214 return dst_ret;
1da177e4
LT
2215}
2216
2217static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2218{
2219 struct rt6_info *rt = (struct rt6_info *) dst;
2220
2221 if (rt) {
54c1a859 2222 if (rt->rt6i_flags & RTF_CACHE) {
c3c14da0 2223 rcu_read_lock();
54c1a859 2224 if (rt6_check_expired(rt)) {
93531c67 2225 rt6_remove_exception_rt(rt);
54c1a859
YH
2226 dst = NULL;
2227 }
c3c14da0 2228 rcu_read_unlock();
54c1a859 2229 } else {
1da177e4 2230 dst_release(dst);
54c1a859
YH
2231 dst = NULL;
2232 }
1da177e4 2233 }
54c1a859 2234 return dst;
1da177e4
LT
2235}
2236
2237static void ip6_link_failure(struct sk_buff *skb)
2238{
2239 struct rt6_info *rt;
2240
3ffe533c 2241 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2242
adf30907 2243 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2244 if (rt) {
8a14e46f 2245 rcu_read_lock();
1eb4f758 2246 if (rt->rt6i_flags & RTF_CACHE) {
761f6026 2247 rt6_remove_exception_rt(rt);
c5cff856 2248 } else {
a68886a6 2249 struct fib6_info *from;
c5cff856
WW
2250 struct fib6_node *fn;
2251
a68886a6
DA
2252 from = rcu_dereference(rt->from);
2253 if (from) {
2254 fn = rcu_dereference(from->fib6_node);
2255 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2256 fn->fn_sernum = -1;
2257 }
1eb4f758 2258 }
8a14e46f 2259 rcu_read_unlock();
1da177e4
LT
2260 }
2261}
2262
6a3e030f
DA
2263static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2264{
a68886a6
DA
2265 if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2266 struct fib6_info *from;
2267
2268 rcu_read_lock();
2269 from = rcu_dereference(rt0->from);
2270 if (from)
2271 rt0->dst.expires = from->expires;
2272 rcu_read_unlock();
2273 }
6a3e030f
DA
2274
2275 dst_set_expires(&rt0->dst, timeout);
2276 rt0->rt6i_flags |= RTF_EXPIRES;
2277}
2278
45e4fd26
MKL
2279static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2280{
2281 struct net *net = dev_net(rt->dst.dev);
2282
d4ead6b3 2283 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2284 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2285 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2286}
2287
0d3f6d29
MKL
2288static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2289{
a68886a6
DA
2290 bool from_set;
2291
2292 rcu_read_lock();
2293 from_set = !!rcu_dereference(rt->from);
2294 rcu_read_unlock();
2295
0d3f6d29 2296 return !(rt->rt6i_flags & RTF_CACHE) &&
a68886a6 2297 (rt->rt6i_flags & RTF_PCPU || from_set);
0d3f6d29
MKL
2298}
2299
45e4fd26
MKL
2300static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2301 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2302{
0dec879f 2303 const struct in6_addr *daddr, *saddr;
67ba4152 2304 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2305
19bda36c
XL
2306 if (dst_metric_locked(dst, RTAX_MTU))
2307 return;
2308
0dec879f
JA
2309 if (iph) {
2310 daddr = &iph->daddr;
2311 saddr = &iph->saddr;
2312 } else if (sk) {
2313 daddr = &sk->sk_v6_daddr;
2314 saddr = &inet6_sk(sk)->saddr;
2315 } else {
2316 daddr = NULL;
2317 saddr = NULL;
2318 }
2319 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2320 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2321 if (mtu >= dst_mtu(dst))
2322 return;
9d289715 2323
0d3f6d29 2324 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2325 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2326 /* update rt6_ex->stamp for cache */
2327 if (rt6->rt6i_flags & RTF_CACHE)
2328 rt6_update_exception_stamp_rt(rt6);
0dec879f 2329 } else if (daddr) {
a68886a6 2330 struct fib6_info *from;
45e4fd26
MKL
2331 struct rt6_info *nrt6;
2332
4d85cd0c 2333 rcu_read_lock();
a68886a6
DA
2334 from = rcu_dereference(rt6->from);
2335 nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
45e4fd26
MKL
2336 if (nrt6) {
2337 rt6_do_update_pmtu(nrt6, mtu);
a68886a6 2338 if (rt6_insert_exception(nrt6, from))
2b760fcf 2339 dst_release_immediate(&nrt6->dst);
45e4fd26 2340 }
a68886a6 2341 rcu_read_unlock();
1da177e4
LT
2342 }
2343}
2344
45e4fd26
MKL
2345static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2346 struct sk_buff *skb, u32 mtu)
2347{
2348 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2349}
2350
42ae66c8 2351void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2352 int oif, u32 mark, kuid_t uid)
81aded24
DM
2353{
2354 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2355 struct dst_entry *dst;
dc92095d
2356 struct flowi6 fl6 = {
2357 .flowi6_oif = oif,
2358 .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2359 .daddr = iph->daddr,
2360 .saddr = iph->saddr,
2361 .flowlabel = ip6_flowinfo(iph),
2362 .flowi6_uid = uid,
2363 };
81aded24
DM
2364
2365 dst = ip6_route_output(net, NULL, &fl6);
2366 if (!dst->error)
45e4fd26 2367 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2368 dst_release(dst);
2369}
2370EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2371
2372void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2373{
7ddacfa5 2374 int oif = sk->sk_bound_dev_if;
33c162a9
MKL
2375 struct dst_entry *dst;
2376
7ddacfa5
DA
2377 if (!oif && skb->dev)
2378 oif = l3mdev_master_ifindex(skb->dev);
2379
2380 ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2381
2382 dst = __sk_dst_get(sk);
2383 if (!dst || !dst->obsolete ||
2384 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2385 return;
2386
2387 bh_lock_sock(sk);
2388 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2389 ip6_datagram_dst_update(sk, false);
2390 bh_unlock_sock(sk);
81aded24
DM
2391}
2392EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2393
7d6850f7
AK
2394void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2395 const struct flowi6 *fl6)
2396{
2397#ifdef CONFIG_IPV6_SUBTREES
2398 struct ipv6_pinfo *np = inet6_sk(sk);
2399#endif
2400
2401 ip6_dst_store(sk, dst,
2402 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2403 &sk->sk_v6_daddr : NULL,
2404#ifdef CONFIG_IPV6_SUBTREES
2405 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2406 &np->saddr :
2407#endif
2408 NULL);
2409}
2410
b55b76b2
DJ
2411/* Handle redirects */
2412struct ip6rd_flowi {
2413 struct flowi6 fl6;
2414 struct in6_addr gateway;
2415};
2416
2417static struct rt6_info *__ip6_route_redirect(struct net *net,
2418 struct fib6_table *table,
2419 struct flowi6 *fl6,
b75cc8f9 2420 const struct sk_buff *skb,
b55b76b2
DJ
2421 int flags)
2422{
2423 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
23fb93a4 2424 struct rt6_info *ret = NULL, *rt_cache;
8d1c802b 2425 struct fib6_info *rt;
b55b76b2
DJ
2426 struct fib6_node *fn;
2427
2428 /* Get the "current" route for this destination and
67c408cf 2429 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2430 *
2431 * RFC 4861 specifies that redirects should only be
2432 * accepted if they come from the nexthop to the target.
2433 * Due to the way the routes are chosen, this notion
2434 * is a bit fuzzy and one might need to check all possible
2435 * routes.
2436 */
2437
66f5d6ce 2438 rcu_read_lock();
6454743b 2439 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
b55b76b2 2440restart:
66f5d6ce 2441 for_each_fib6_node_rt_rcu(fn) {
5e670d84 2442 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c 2443 continue;
14895687 2444 if (fib6_check_expired(rt))
b55b76b2 2445 continue;
93c2fb25 2446 if (rt->fib6_flags & RTF_REJECT)
b55b76b2 2447 break;
93c2fb25 2448 if (!(rt->fib6_flags & RTF_GATEWAY))
b55b76b2 2449 continue;
5e670d84 2450 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
b55b76b2 2451 continue;
2b760fcf
WW
2452 /* rt_cache's gateway might be different from its 'parent'
2453 * in the case of an ip redirect.
2454 * So we keep searching in the exception table if the gateway
2455 * is different.
2456 */
5e670d84 2457 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2b760fcf
WW
2458 rt_cache = rt6_find_cached_rt(rt,
2459 &fl6->daddr,
2460 &fl6->saddr);
2461 if (rt_cache &&
2462 ipv6_addr_equal(&rdfl->gateway,
2463 &rt_cache->rt6i_gateway)) {
23fb93a4 2464 ret = rt_cache;
2b760fcf
WW
2465 break;
2466 }
b55b76b2 2467 continue;
2b760fcf 2468 }
b55b76b2
DJ
2469 break;
2470 }
2471
2472 if (!rt)
421842ed 2473 rt = net->ipv6.fib6_null_entry;
93c2fb25 2474 else if (rt->fib6_flags & RTF_REJECT) {
23fb93a4 2475 ret = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2476 goto out;
2477 }
2478
421842ed 2479 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2480 fn = fib6_backtrack(fn, &fl6->saddr);
2481 if (fn)
2482 goto restart;
b55b76b2 2483 }
a3c00e46 2484
b0a1ba59 2485out:
23fb93a4 2486 if (ret)
e873e4b9 2487 ip6_hold_safe(net, &ret, true);
23fb93a4
DA
2488 else
2489 ret = ip6_create_rt_rcu(rt);
b55b76b2 2490
66f5d6ce 2491 rcu_read_unlock();
b55b76b2 2492
b65f164d 2493 trace_fib6_table_lookup(net, rt, table, fl6);
23fb93a4 2494 return ret;
b55b76b2
DJ
2495};
2496
2497static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2498 const struct flowi6 *fl6,
2499 const struct sk_buff *skb,
2500 const struct in6_addr *gateway)
b55b76b2
DJ
2501{
2502 int flags = RT6_LOOKUP_F_HAS_SADDR;
2503 struct ip6rd_flowi rdfl;
2504
2505 rdfl.fl6 = *fl6;
2506 rdfl.gateway = *gateway;
2507
b75cc8f9 2508 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2509 flags, __ip6_route_redirect);
2510}
2511
e2d118a1
LC
2512void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2513 kuid_t uid)
3a5ad2ee
DM
2514{
2515 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2516 struct dst_entry *dst;
1f7f10ac
2517 struct flowi6 fl6 = {
2518 .flowi6_iif = LOOPBACK_IFINDEX,
2519 .flowi6_oif = oif,
2520 .flowi6_mark = mark,
2521 .daddr = iph->daddr,
2522 .saddr = iph->saddr,
2523 .flowlabel = ip6_flowinfo(iph),
2524 .flowi6_uid = uid,
2525 };
3a5ad2ee 2526
b75cc8f9 2527 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2528 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2529 dst_release(dst);
2530}
2531EXPORT_SYMBOL_GPL(ip6_redirect);
2532
d456336d 2533void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
c92a59ec
DJ
2534{
2535 const struct ipv6hdr *iph = ipv6_hdr(skb);
2536 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2537 struct dst_entry *dst;
0b26fb17
2538 struct flowi6 fl6 = {
2539 .flowi6_iif = LOOPBACK_IFINDEX,
2540 .flowi6_oif = oif,
0b26fb17
2541 .daddr = msg->dest,
2542 .saddr = iph->daddr,
2543 .flowi6_uid = sock_net_uid(net, NULL),
2544 };
c92a59ec 2545
b75cc8f9 2546 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2547 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2548 dst_release(dst);
2549}
2550
3a5ad2ee
DM
2551void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2552{
e2d118a1
LC
2553 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2554 sk->sk_uid);
3a5ad2ee
DM
2555}
2556EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2557
0dbaee3b 2558static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2559{
0dbaee3b
DM
2560 struct net_device *dev = dst->dev;
2561 unsigned int mtu = dst_mtu(dst);
2562 struct net *net = dev_net(dev);
2563
1da177e4
LT
2564 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2565
5578689a
DL
2566 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2567 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2568
2569 /*
1ab1457c
YH
2570 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2571 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2572 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2573 * rely only on pmtu discovery"
2574 */
2575 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2576 mtu = IPV6_MAXPLEN;
2577 return mtu;
2578}
2579
ebb762f2 2580static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2581{
d33e4553 2582 struct inet6_dev *idev;
d4ead6b3 2583 unsigned int mtu;
4b32b5ad
MKL
2584
2585 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2586 if (mtu)
30f78d8e 2587 goto out;
618f9bc7
SK
2588
2589 mtu = IPV6_MIN_MTU;
d33e4553
DM
2590
2591 rcu_read_lock();
2592 idev = __in6_dev_get(dst->dev);
2593 if (idev)
2594 mtu = idev->cnf.mtu6;
2595 rcu_read_unlock();
2596
30f78d8e 2597out:
14972cbd
RP
2598 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2599
2600 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2601}
2602
901731b8
DA
2603/* MTU selection:
2604 * 1. mtu on route is locked - use it
2605 * 2. mtu from nexthop exception
2606 * 3. mtu from egress device
2607 *
2608 * based on ip6_dst_mtu_forward and exception logic of
2609 * rt6_find_cached_rt; called with rcu_read_lock
2610 */
2611u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
2612 struct in6_addr *saddr)
2613{
2614 struct rt6_exception_bucket *bucket;
2615 struct rt6_exception *rt6_ex;
2616 struct in6_addr *src_key;
2617 struct inet6_dev *idev;
2618 u32 mtu = 0;
2619
2620 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2621 mtu = f6i->fib6_pmtu;
2622 if (mtu)
2623 goto out;
2624 }
2625
2626 src_key = NULL;
2627#ifdef CONFIG_IPV6_SUBTREES
2628 if (f6i->fib6_src.plen)
2629 src_key = saddr;
2630#endif
2631
2632 bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2633 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2634 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2635 mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2636
2637 if (likely(!mtu)) {
2638 struct net_device *dev = fib6_info_nh_dev(f6i);
2639
2640 mtu = IPV6_MIN_MTU;
2641 idev = __in6_dev_get(dev);
2642 if (idev && idev->cnf.mtu6 > mtu)
2643 mtu = idev->cnf.mtu6;
2644 }
2645
2646 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2647out:
2648 return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
2649}
2650
3b00944c 2651struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2652 struct flowi6 *fl6)
1da177e4 2653{
87a11578 2654 struct dst_entry *dst;
1da177e4
LT
2655 struct rt6_info *rt;
2656 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2657 struct net *net = dev_net(dev);
1da177e4 2658
38308473 2659 if (unlikely(!idev))
122bdf67 2660 return ERR_PTR(-ENODEV);
1da177e4 2661
ad706862 2662 rt = ip6_dst_alloc(net, dev, 0);
38308473 2663 if (unlikely(!rt)) {
1da177e4 2664 in6_dev_put(idev);
87a11578 2665 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2666 goto out;
2667 }
2668
8e2ec639 2669 rt->dst.flags |= DST_HOST;
588753f1 2670 rt->dst.input = ip6_input;
8e2ec639 2671 rt->dst.output = ip6_output;
550bab42 2672 rt->rt6i_gateway = fl6->daddr;
87a11578 2673 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2674 rt->rt6i_dst.plen = 128;
2675 rt->rt6i_idev = idev;
14edd87d 2676 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2677
4c981e28 2678 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2679 * do proper release of the net_device
2680 */
2681 rt6_uncached_list_add(rt);
81eb8447 2682 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2683
87a11578
DM
2684 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2685
1da177e4 2686out:
87a11578 2687 return dst;
1da177e4
LT
2688}
2689
569d3645 2690static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2691{
86393e52 2692 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2693 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2694 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2695 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2696 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2697 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2698 int entries;
7019b78e 2699
fc66f95c 2700 entries = dst_entries_get_fast(ops);
49a18d86 2701 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2702 entries <= rt_max_size)
1da177e4
LT
2703 goto out;
2704
6891a346 2705 net->ipv6.ip6_rt_gc_expire++;
14956643 2706 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2707 entries = dst_entries_get_slow(ops);
2708 if (entries < ops->gc_thresh)
7019b78e 2709 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2710out:
7019b78e 2711 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2712 return entries > rt_max_size;
1da177e4
LT
2713}
2714
8c14586f
DA
2715static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2716 struct fib6_config *cfg,
f4797b33
DA
2717 const struct in6_addr *gw_addr,
2718 u32 tbid, int flags)
8c14586f
DA
2719{
2720 struct flowi6 fl6 = {
2721 .flowi6_oif = cfg->fc_ifindex,
2722 .daddr = *gw_addr,
2723 .saddr = cfg->fc_prefsrc,
2724 };
2725 struct fib6_table *table;
2726 struct rt6_info *rt;
8c14586f 2727
f4797b33 2728 table = fib6_get_table(net, tbid);
8c14586f
DA
2729 if (!table)
2730 return NULL;
2731
2732 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2733 flags |= RT6_LOOKUP_F_HAS_SADDR;
2734
f4797b33 2735 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2736 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2737
2738 /* if table lookup failed, fall back to full lookup */
2739 if (rt == net->ipv6.ip6_null_entry) {
2740 ip6_rt_put(rt);
2741 rt = NULL;
2742 }
2743
2744 return rt;
2745}
2746
fc1e64e1
DA
2747static int ip6_route_check_nh_onlink(struct net *net,
2748 struct fib6_config *cfg,
9fbb704c 2749 const struct net_device *dev,
fc1e64e1
DA
2750 struct netlink_ext_ack *extack)
2751{
44750f84 2752 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2753 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2754 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
34bb9431 2755 struct fib6_info *from;
fc1e64e1
DA
2756 struct rt6_info *grt;
2757 int err;
2758
2759 err = 0;
2760 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2761 if (grt) {
34bb9431
PA
2762 rcu_read_lock();
2763 from = rcu_dereference(grt->from);
58e354c0 2764 if (!grt->dst.error &&
4ed591c8 2765 /* ignore match if it is the default route */
34bb9431 2766 from && !ipv6_addr_any(&from->fib6_dst.addr) &&
58e354c0 2767 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2768 NL_SET_ERR_MSG(extack,
2769 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2770 err = -EINVAL;
2771 }
34bb9431 2772 rcu_read_unlock();
fc1e64e1
DA
2773
2774 ip6_rt_put(grt);
2775 }
2776
2777 return err;
2778}
2779
1edce99f
DA
2780static int ip6_route_check_nh(struct net *net,
2781 struct fib6_config *cfg,
2782 struct net_device **_dev,
2783 struct inet6_dev **idev)
2784{
2785 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2786 struct net_device *dev = _dev ? *_dev : NULL;
2787 struct rt6_info *grt = NULL;
2788 int err = -EHOSTUNREACH;
2789
2790 if (cfg->fc_table) {
f4797b33
DA
2791 int flags = RT6_LOOKUP_F_IFACE;
2792
2793 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2794 cfg->fc_table, flags);
1edce99f
DA
2795 if (grt) {
2796 if (grt->rt6i_flags & RTF_GATEWAY ||
2797 (dev && dev != grt->dst.dev)) {
2798 ip6_rt_put(grt);
2799 grt = NULL;
2800 }
2801 }
2802 }
2803
2804 if (!grt)
b75cc8f9 2805 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2806
2807 if (!grt)
2808 goto out;
2809
2810 if (dev) {
2811 if (dev != grt->dst.dev) {
2812 ip6_rt_put(grt);
2813 goto out;
2814 }
2815 } else {
2816 *_dev = dev = grt->dst.dev;
2817 *idev = grt->rt6i_idev;
2818 dev_hold(dev);
2819 in6_dev_hold(grt->rt6i_idev);
2820 }
2821
2822 if (!(grt->rt6i_flags & RTF_GATEWAY))
2823 err = 0;
2824
2825 ip6_rt_put(grt);
2826
2827out:
2828 return err;
2829}
2830
9fbb704c
DA
2831static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2832 struct net_device **_dev, struct inet6_dev **idev,
2833 struct netlink_ext_ack *extack)
2834{
2835 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2836 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2837 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2838 const struct net_device *dev = *_dev;
232378e8 2839 bool need_addr_check = !dev;
9fbb704c
DA
2840 int err = -EINVAL;
2841
2842 /* if gw_addr is local we will fail to detect this in case
2843 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2844 * will return already-added prefix route via interface that
2845 * prefix route was assigned to, which might be non-loopback.
2846 */
232378e8
DA
2847 if (dev &&
2848 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2849 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2850 goto out;
2851 }
2852
2853 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2854 /* IPv6 strictly inhibits using not link-local
2855 * addresses as nexthop address.
2856 * Otherwise, router will not able to send redirects.
2857 * It is very good, but in some (rare!) circumstances
2858 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2859 * some exceptions. --ANK
2860 * We allow IPv4-mapped nexthops to support RFC4798-type
2861 * addressing
2862 */
2863 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2864 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2865 goto out;
2866 }
2867
2868 if (cfg->fc_flags & RTNH_F_ONLINK)
2869 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2870 else
2871 err = ip6_route_check_nh(net, cfg, _dev, idev);
2872
2873 if (err)
2874 goto out;
2875 }
2876
2877 /* reload in case device was changed */
2878 dev = *_dev;
2879
2880 err = -EINVAL;
2881 if (!dev) {
2882 NL_SET_ERR_MSG(extack, "Egress device not specified");
2883 goto out;
2884 } else if (dev->flags & IFF_LOOPBACK) {
2885 NL_SET_ERR_MSG(extack,
2886 "Egress device can not be loopback device for this route");
2887 goto out;
2888 }
232378e8
DA
2889
2890 /* if we did not check gw_addr above, do so now that the
2891 * egress device has been resolved.
2892 */
2893 if (need_addr_check &&
2894 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2895 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2896 goto out;
2897 }
2898
9fbb704c
DA
2899 err = 0;
2900out:
2901 return err;
2902}
2903
8d1c802b 2904static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
acb54e3c 2905 gfp_t gfp_flags,
333c4301 2906 struct netlink_ext_ack *extack)
1da177e4 2907{
5578689a 2908 struct net *net = cfg->fc_nlinfo.nl_net;
8d1c802b 2909 struct fib6_info *rt = NULL;
1da177e4
LT
2910 struct net_device *dev = NULL;
2911 struct inet6_dev *idev = NULL;
c71099ac 2912 struct fib6_table *table;
1da177e4 2913 int addr_type;
8c5b83f0 2914 int err = -EINVAL;
1da177e4 2915
557c44be 2916 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2917 if (cfg->fc_flags & RTF_PCPU) {
2918 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2919 goto out;
d5d531cb 2920 }
557c44be 2921
2ea2352e
WW
2922 /* RTF_CACHE is an internal flag; can not be set by userspace */
2923 if (cfg->fc_flags & RTF_CACHE) {
2924 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2925 goto out;
2926 }
2927
e8478e80
DA
2928 if (cfg->fc_type > RTN_MAX) {
2929 NL_SET_ERR_MSG(extack, "Invalid route type");
2930 goto out;
2931 }
2932
d5d531cb
DA
2933 if (cfg->fc_dst_len > 128) {
2934 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2935 goto out;
2936 }
2937 if (cfg->fc_src_len > 128) {
2938 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2939 goto out;
d5d531cb 2940 }
1da177e4 2941#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2942 if (cfg->fc_src_len) {
2943 NL_SET_ERR_MSG(extack,
2944 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2945 goto out;
d5d531cb 2946 }
1da177e4 2947#endif
86872cb5 2948 if (cfg->fc_ifindex) {
1da177e4 2949 err = -ENODEV;
5578689a 2950 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2951 if (!dev)
2952 goto out;
2953 idev = in6_dev_get(dev);
2954 if (!idev)
2955 goto out;
2956 }
2957
86872cb5
TG
2958 if (cfg->fc_metric == 0)
2959 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2960
fc1e64e1
DA
2961 if (cfg->fc_flags & RTNH_F_ONLINK) {
2962 if (!dev) {
2963 NL_SET_ERR_MSG(extack,
2964 "Nexthop device required for onlink");
2965 err = -ENODEV;
2966 goto out;
2967 }
2968
2969 if (!(dev->flags & IFF_UP)) {
2970 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2971 err = -ENETDOWN;
2972 goto out;
2973 }
2974 }
2975
d71314b4 2976 err = -ENOBUFS;
38308473
DM
2977 if (cfg->fc_nlinfo.nlh &&
2978 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2979 table = fib6_get_table(net, cfg->fc_table);
38308473 2980 if (!table) {
f3213831 2981 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2982 table = fib6_new_table(net, cfg->fc_table);
2983 }
2984 } else {
2985 table = fib6_new_table(net, cfg->fc_table);
2986 }
38308473
DM
2987
2988 if (!table)
c71099ac 2989 goto out;
c71099ac 2990
93531c67
DA
2991 err = -ENOMEM;
2992 rt = fib6_info_alloc(gfp_flags);
2993 if (!rt)
1da177e4 2994 goto out;
93531c67 2995
767a2217
DA
2996 rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len);
2997 if (IS_ERR(rt->fib6_metrics)) {
2998 err = PTR_ERR(rt->fib6_metrics);
fda21d46
ED
2999 /* Do not leave garbage there. */
3000 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
767a2217
DA
3001 goto out;
3002 }
3003
93531c67
DA
3004 if (cfg->fc_flags & RTF_ADDRCONF)
3005 rt->dst_nocount = true;
1da177e4 3006
1716a961 3007 if (cfg->fc_flags & RTF_EXPIRES)
14895687 3008 fib6_set_expires(rt, jiffies +
1716a961
G
3009 clock_t_to_jiffies(cfg->fc_expires));
3010 else
14895687 3011 fib6_clean_expires(rt);
1da177e4 3012
86872cb5
TG
3013 if (cfg->fc_protocol == RTPROT_UNSPEC)
3014 cfg->fc_protocol = RTPROT_BOOT;
93c2fb25 3015 rt->fib6_protocol = cfg->fc_protocol;
86872cb5
TG
3016
3017 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4 3018
19e42e45
RP
3019 if (cfg->fc_encap) {
3020 struct lwtunnel_state *lwtstate;
3021
30357d7d 3022 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 3023 cfg->fc_encap, AF_INET6, cfg,
9ae28727 3024 &lwtstate, extack);
19e42e45
RP
3025 if (err)
3026 goto out;
5e670d84 3027 rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
19e42e45
RP
3028 }
3029
93c2fb25
DA
3030 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3031 rt->fib6_dst.plen = cfg->fc_dst_len;
3032 if (rt->fib6_dst.plen == 128)
3b6761d1 3033 rt->dst_host = true;
e5fd387a 3034
1da177e4 3035#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
3036 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3037 rt->fib6_src.plen = cfg->fc_src_len;
1da177e4
LT
3038#endif
3039
93c2fb25 3040 rt->fib6_metric = cfg->fc_metric;
5e670d84 3041 rt->fib6_nh.nh_weight = 1;
1da177e4 3042
e8478e80 3043 rt->fib6_type = cfg->fc_type;
1da177e4
LT
3044
3045 /* We cannot add true routes via loopback here,
3046 they would result in kernel looping; promote them to reject routes
3047 */
86872cb5 3048 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
3049 (dev && (dev->flags & IFF_LOOPBACK) &&
3050 !(addr_type & IPV6_ADDR_LOOPBACK) &&
3051 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 3052 /* hold loopback dev/idev if we haven't done so. */
5578689a 3053 if (dev != net->loopback_dev) {
1da177e4
LT
3054 if (dev) {
3055 dev_put(dev);
3056 in6_dev_put(idev);
3057 }
5578689a 3058 dev = net->loopback_dev;
1da177e4
LT
3059 dev_hold(dev);
3060 idev = in6_dev_get(dev);
3061 if (!idev) {
3062 err = -ENODEV;
3063 goto out;
3064 }
3065 }
93c2fb25 3066 rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
1da177e4
LT
3067 goto install_route;
3068 }
3069
86872cb5 3070 if (cfg->fc_flags & RTF_GATEWAY) {
9fbb704c
DA
3071 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3072 if (err)
48ed7b26 3073 goto out;
1da177e4 3074
93531c67 3075 rt->fib6_nh.nh_gw = cfg->fc_gateway;
1da177e4
LT
3076 }
3077
3078 err = -ENODEV;
38308473 3079 if (!dev)
1da177e4
LT
3080 goto out;
3081
428604fb
LB
3082 if (idev->cnf.disable_ipv6) {
3083 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3084 err = -EACCES;
3085 goto out;
3086 }
3087
955ec4cb
DA
3088 if (!(dev->flags & IFF_UP)) {
3089 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3090 err = -ENETDOWN;
3091 goto out;
3092 }
3093
c3968a85
DW
3094 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3095 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3096 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3097 err = -EINVAL;
3098 goto out;
3099 }
93c2fb25
DA
3100 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3101 rt->fib6_prefsrc.plen = 128;
c3968a85 3102 } else
93c2fb25 3103 rt->fib6_prefsrc.plen = 0;
c3968a85 3104
93c2fb25 3105 rt->fib6_flags = cfg->fc_flags;
1da177e4
LT
3106
3107install_route:
93c2fb25 3108 if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
5609b80a 3109 !netif_carrier_ok(dev))
5e670d84
DA
3110 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3111 rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
93531c67 3112 rt->fib6_nh.nh_dev = dev;
93c2fb25 3113 rt->fib6_table = table;
63152fc0 3114
dcd1f572
DA
3115 if (idev)
3116 in6_dev_put(idev);
3117
8c5b83f0 3118 return rt;
6b9ea5a6
RP
3119out:
3120 if (dev)
3121 dev_put(dev);
3122 if (idev)
3123 in6_dev_put(idev);
6b9ea5a6 3124
93531c67 3125 fib6_info_release(rt);
8c5b83f0 3126 return ERR_PTR(err);
6b9ea5a6
RP
3127}
3128
acb54e3c 3129int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
333c4301 3130 struct netlink_ext_ack *extack)
6b9ea5a6 3131{
8d1c802b 3132 struct fib6_info *rt;
6b9ea5a6
RP
3133 int err;
3134
acb54e3c 3135 rt = ip6_route_info_create(cfg, gfp_flags, extack);
d4ead6b3
DA
3136 if (IS_ERR(rt))
3137 return PTR_ERR(rt);
6b9ea5a6 3138
d4ead6b3 3139 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
93531c67 3140 fib6_info_release(rt);
6b9ea5a6 3141
1da177e4
LT
3142 return err;
3143}
3144
8d1c802b 3145static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
1da177e4 3146{
afb1d4b5 3147 struct net *net = info->nl_net;
c71099ac 3148 struct fib6_table *table;
afb1d4b5 3149 int err;
1da177e4 3150
421842ed 3151 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3152 err = -ENOENT;
3153 goto out;
3154 }
6c813a72 3155
93c2fb25 3156 table = rt->fib6_table;
66f5d6ce 3157 spin_lock_bh(&table->tb6_lock);
86872cb5 3158 err = fib6_del(rt, info);
66f5d6ce 3159 spin_unlock_bh(&table->tb6_lock);
1da177e4 3160
6825a26c 3161out:
93531c67 3162 fib6_info_release(rt);
1da177e4
LT
3163 return err;
3164}
3165
8d1c802b 3166int ip6_del_rt(struct net *net, struct fib6_info *rt)
e0a1ad73 3167{
afb1d4b5
DA
3168 struct nl_info info = { .nl_net = net };
3169
528c4ceb 3170 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3171}
3172
8d1c802b 3173static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
0ae81335
DA
3174{
3175 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3176 struct net *net = info->nl_net;
16a16cd3 3177 struct sk_buff *skb = NULL;
0ae81335 3178 struct fib6_table *table;
e3330039 3179 int err = -ENOENT;
0ae81335 3180
421842ed 3181 if (rt == net->ipv6.fib6_null_entry)
e3330039 3182 goto out_put;
93c2fb25 3183 table = rt->fib6_table;
66f5d6ce 3184 spin_lock_bh(&table->tb6_lock);
0ae81335 3185
93c2fb25 3186 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
8d1c802b 3187 struct fib6_info *sibling, *next_sibling;
0ae81335 3188
16a16cd3
DA
3189 /* prefer to send a single notification with all hops */
3190 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3191 if (skb) {
3192 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3193
d4ead6b3 3194 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3195 NULL, NULL, 0, RTM_DELROUTE,
3196 info->portid, seq, 0) < 0) {
3197 kfree_skb(skb);
3198 skb = NULL;
3199 } else
3200 info->skip_notify = 1;
3201 }
3202
0ae81335 3203 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25
DA
3204 &rt->fib6_siblings,
3205 fib6_siblings) {
0ae81335
DA
3206 err = fib6_del(sibling, info);
3207 if (err)
e3330039 3208 goto out_unlock;
0ae81335
DA
3209 }
3210 }
3211
3212 err = fib6_del(rt, info);
e3330039 3213out_unlock:
66f5d6ce 3214 spin_unlock_bh(&table->tb6_lock);
e3330039 3215out_put:
93531c67 3216 fib6_info_release(rt);
16a16cd3
DA
3217
3218 if (skb) {
e3330039 3219 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3220 info->nlh, gfp_any());
3221 }
0ae81335
DA
3222 return err;
3223}
3224
23fb93a4
DA
3225static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3226{
3227 int rc = -ESRCH;
3228
3229 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3230 goto out;
3231
3232 if (cfg->fc_flags & RTF_GATEWAY &&
3233 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3234 goto out;
761f6026
XL
3235
3236 rc = rt6_remove_exception_rt(rt);
23fb93a4
DA
3237out:
3238 return rc;
3239}
3240
333c4301
DA
3241static int ip6_route_del(struct fib6_config *cfg,
3242 struct netlink_ext_ack *extack)
1da177e4 3243{
8d1c802b 3244 struct rt6_info *rt_cache;
c71099ac 3245 struct fib6_table *table;
8d1c802b 3246 struct fib6_info *rt;
1da177e4 3247 struct fib6_node *fn;
1da177e4
LT
3248 int err = -ESRCH;
3249
5578689a 3250 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3251 if (!table) {
3252 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3253 return err;
d5d531cb 3254 }
c71099ac 3255
66f5d6ce 3256 rcu_read_lock();
1da177e4 3257
c71099ac 3258 fn = fib6_locate(&table->tb6_root,
86872cb5 3259 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3260 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3261 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3262
1da177e4 3263 if (fn) {
66f5d6ce 3264 for_each_fib6_node_rt_rcu(fn) {
2b760fcf 3265 if (cfg->fc_flags & RTF_CACHE) {
23fb93a4
DA
3266 int rc;
3267
2b760fcf
WW
3268 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3269 &cfg->fc_src);
23fb93a4
DA
3270 if (rt_cache) {
3271 rc = ip6_del_cached_rt(rt_cache, cfg);
9e575010
ED
3272 if (rc != -ESRCH) {
3273 rcu_read_unlock();
23fb93a4 3274 return rc;
9e575010 3275 }
23fb93a4
DA
3276 }
3277 continue;
2b760fcf 3278 }
86872cb5 3279 if (cfg->fc_ifindex &&
5e670d84
DA
3280 (!rt->fib6_nh.nh_dev ||
3281 rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3282 continue;
86872cb5 3283 if (cfg->fc_flags & RTF_GATEWAY &&
5e670d84 3284 !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
1da177e4 3285 continue;
93c2fb25 3286 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
1da177e4 3287 continue;
93c2fb25 3288 if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
c2ed1880 3289 continue;
e873e4b9
WW
3290 if (!fib6_info_hold_safe(rt))
3291 continue;
66f5d6ce 3292 rcu_read_unlock();
1da177e4 3293
0ae81335
DA
3294 /* if gateway was specified only delete the one hop */
3295 if (cfg->fc_flags & RTF_GATEWAY)
3296 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3297
3298 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3299 }
3300 }
66f5d6ce 3301 rcu_read_unlock();
1da177e4
LT
3302
3303 return err;
3304}
3305
6700c270 3306static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3307{
a6279458 3308 struct netevent_redirect netevent;
e8599ff4 3309 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3310 struct ndisc_options ndopts;
3311 struct inet6_dev *in6_dev;
3312 struct neighbour *neigh;
a68886a6 3313 struct fib6_info *from;
71bcdba0 3314 struct rd_msg *msg;
6e157b6a
DM
3315 int optlen, on_link;
3316 u8 *lladdr;
e8599ff4 3317
29a3cad5 3318 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3319 optlen -= sizeof(*msg);
e8599ff4
DM
3320
3321 if (optlen < 0) {
6e157b6a 3322 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3323 return;
3324 }
3325
71bcdba0 3326 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3327
71bcdba0 3328 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3329 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3330 return;
3331 }
3332
6e157b6a 3333 on_link = 0;
71bcdba0 3334 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3335 on_link = 1;
71bcdba0 3336 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3337 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3338 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3339 return;
3340 }
3341
3342 in6_dev = __in6_dev_get(skb->dev);
3343 if (!in6_dev)
3344 return;
3345 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3346 return;
3347
3348 /* RFC2461 8.1:
3349 * The IP source address of the Redirect MUST be the same as the current
3350 * first-hop router for the specified ICMP Destination Address.
3351 */
3352
f997c55c 3353 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3354 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3355 return;
3356 }
6e157b6a
DM
3357
3358 lladdr = NULL;
e8599ff4
DM
3359 if (ndopts.nd_opts_tgt_lladdr) {
3360 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3361 skb->dev);
3362 if (!lladdr) {
3363 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3364 return;
3365 }
3366 }
3367
6e157b6a 3368 rt = (struct rt6_info *) dst;
ec13ad1d 3369 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3370 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3371 return;
6e157b6a 3372 }
e8599ff4 3373
6e157b6a
DM
3374 /* Redirect received -> path was valid.
3375 * Look, redirects are sent only in response to data packets,
3376 * so that this nexthop apparently is reachable. --ANK
3377 */
0dec879f 3378 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3379
71bcdba0 3380 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3381 if (!neigh)
3382 return;
a6279458 3383
1da177e4
LT
3384 /*
3385 * We have finally decided to accept it.
3386 */
3387
f997c55c 3388 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3389 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3390 NEIGH_UPDATE_F_OVERRIDE|
3391 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3392 NEIGH_UPDATE_F_ISROUTER)),
3393 NDISC_REDIRECT, &ndopts);
1da177e4 3394
4d85cd0c 3395 rcu_read_lock();
a68886a6 3396 from = rcu_dereference(rt->from);
e873e4b9
WW
3397 /* This fib6_info_hold() is safe here because we hold reference to rt
3398 * and rt already holds reference to fib6_info.
3399 */
8a14e46f 3400 fib6_info_hold(from);
4d85cd0c 3401 rcu_read_unlock();
8a14e46f
DA
3402
3403 nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
38308473 3404 if (!nrt)
1da177e4
LT
3405 goto out;
3406
3407 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3408 if (on_link)
3409 nrt->rt6i_flags &= ~RTF_GATEWAY;
3410
4e3fd7a0 3411 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3412
2b760fcf
WW
3413 /* No need to remove rt from the exception table if rt is
3414 * a cached route because rt6_insert_exception() will
3415 * takes care of it
3416 */
8a14e46f 3417 if (rt6_insert_exception(nrt, from)) {
2b760fcf
WW
3418 dst_release_immediate(&nrt->dst);
3419 goto out;
3420 }
1da177e4 3421
d8d1f30b
CG
3422 netevent.old = &rt->dst;
3423 netevent.new = &nrt->dst;
71bcdba0 3424 netevent.daddr = &msg->dest;
60592833 3425 netevent.neigh = neigh;
8d71740c
TT
3426 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3427
1da177e4 3428out:
8a14e46f 3429 fib6_info_release(from);
e8599ff4 3430 neigh_release(neigh);
6e157b6a
DM
3431}
3432
70ceb4f5 3433#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 3434static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 3435 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3436 const struct in6_addr *gwaddr,
3437 struct net_device *dev)
70ceb4f5 3438{
830218c1
DA
3439 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3440 int ifindex = dev->ifindex;
70ceb4f5 3441 struct fib6_node *fn;
8d1c802b 3442 struct fib6_info *rt = NULL;
c71099ac
TG
3443 struct fib6_table *table;
3444
830218c1 3445 table = fib6_get_table(net, tb_id);
38308473 3446 if (!table)
c71099ac 3447 return NULL;
70ceb4f5 3448
66f5d6ce 3449 rcu_read_lock();
38fbeeee 3450 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3451 if (!fn)
3452 goto out;
3453
66f5d6ce 3454 for_each_fib6_node_rt_rcu(fn) {
5e670d84 3455 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
70ceb4f5 3456 continue;
93c2fb25 3457 if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
70ceb4f5 3458 continue;
5e670d84 3459 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
70ceb4f5 3460 continue;
e873e4b9
WW
3461 if (!fib6_info_hold_safe(rt))
3462 continue;
70ceb4f5
YH
3463 break;
3464 }
3465out:
66f5d6ce 3466 rcu_read_unlock();
70ceb4f5
YH
3467 return rt;
3468}
3469
8d1c802b 3470static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 3471 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3472 const struct in6_addr *gwaddr,
3473 struct net_device *dev,
95c96174 3474 unsigned int pref)
70ceb4f5 3475{
86872cb5 3476 struct fib6_config cfg = {
238fc7ea 3477 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3478 .fc_ifindex = dev->ifindex,
86872cb5
TG
3479 .fc_dst_len = prefixlen,
3480 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3481 RTF_UP | RTF_PREF(pref),
b91d5329 3482 .fc_protocol = RTPROT_RA,
e8478e80 3483 .fc_type = RTN_UNICAST,
15e47304 3484 .fc_nlinfo.portid = 0,
efa2cea0
DL
3485 .fc_nlinfo.nlh = NULL,
3486 .fc_nlinfo.nl_net = net,
86872cb5
TG
3487 };
3488
830218c1 3489 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3490 cfg.fc_dst = *prefix;
3491 cfg.fc_gateway = *gwaddr;
70ceb4f5 3492
e317da96
YH
3493 /* We should treat it as a default route if prefix length is 0. */
3494 if (!prefixlen)
86872cb5 3495 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3496
acb54e3c 3497 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
70ceb4f5 3498
830218c1 3499 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3500}
3501#endif
3502
8d1c802b 3503struct fib6_info *rt6_get_dflt_router(struct net *net,
afb1d4b5
DA
3504 const struct in6_addr *addr,
3505 struct net_device *dev)
1ab1457c 3506{
830218c1 3507 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
8d1c802b 3508 struct fib6_info *rt;
c71099ac 3509 struct fib6_table *table;
1da177e4 3510
afb1d4b5 3511 table = fib6_get_table(net, tb_id);
38308473 3512 if (!table)
c71099ac 3513 return NULL;
1da177e4 3514
66f5d6ce
WW
3515 rcu_read_lock();
3516 for_each_fib6_node_rt_rcu(&table->tb6_root) {
5e670d84 3517 if (dev == rt->fib6_nh.nh_dev &&
93c2fb25 3518 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
5e670d84 3519 ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
1da177e4
LT
3520 break;
3521 }
e873e4b9
WW
3522 if (rt && !fib6_info_hold_safe(rt))
3523 rt = NULL;
66f5d6ce 3524 rcu_read_unlock();
1da177e4
LT
3525 return rt;
3526}
3527
8d1c802b 3528struct fib6_info *rt6_add_dflt_router(struct net *net,
afb1d4b5 3529 const struct in6_addr *gwaddr,
ebacaaa0
YH
3530 struct net_device *dev,
3531 unsigned int pref)
1da177e4 3532{
86872cb5 3533 struct fib6_config cfg = {
ca254490 3534 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3535 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3536 .fc_ifindex = dev->ifindex,
3537 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3538 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3539 .fc_protocol = RTPROT_RA,
e8478e80 3540 .fc_type = RTN_UNICAST,
15e47304 3541 .fc_nlinfo.portid = 0,
5578689a 3542 .fc_nlinfo.nlh = NULL,
afb1d4b5 3543 .fc_nlinfo.nl_net = net,
86872cb5 3544 };
1da177e4 3545
4e3fd7a0 3546 cfg.fc_gateway = *gwaddr;
1da177e4 3547
acb54e3c 3548 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
830218c1
DA
3549 struct fib6_table *table;
3550
3551 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3552 if (table)
3553 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3554 }
1da177e4 3555
afb1d4b5 3556 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3557}
3558
afb1d4b5
DA
3559static void __rt6_purge_dflt_routers(struct net *net,
3560 struct fib6_table *table)
1da177e4 3561{
8d1c802b 3562 struct fib6_info *rt;
1da177e4
LT
3563
3564restart:
66f5d6ce
WW
3565 rcu_read_lock();
3566 for_each_fib6_node_rt_rcu(&table->tb6_root) {
dcd1f572
DA
3567 struct net_device *dev = fib6_info_nh_dev(rt);
3568 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3569
93c2fb25 3570 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
e873e4b9
WW
3571 (!idev || idev->cnf.accept_ra != 2) &&
3572 fib6_info_hold_safe(rt)) {
93531c67
DA
3573 rcu_read_unlock();
3574 ip6_del_rt(net, rt);
1da177e4
LT
3575 goto restart;
3576 }
3577 }
66f5d6ce 3578 rcu_read_unlock();
830218c1
DA
3579
3580 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3581}
3582
3583void rt6_purge_dflt_routers(struct net *net)
3584{
3585 struct fib6_table *table;
3586 struct hlist_head *head;
3587 unsigned int h;
3588
3589 rcu_read_lock();
3590
3591 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3592 head = &net->ipv6.fib_table_hash[h];
3593 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3594 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3595 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3596 }
3597 }
3598
3599 rcu_read_unlock();
1da177e4
LT
3600}
3601
5578689a
DL
3602static void rtmsg_to_fib6_config(struct net *net,
3603 struct in6_rtmsg *rtmsg,
86872cb5
TG
3604 struct fib6_config *cfg)
3605{
8823a3ac
3606 *cfg = (struct fib6_config){
3607 .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3608 : RT6_TABLE_MAIN,
3609 .fc_ifindex = rtmsg->rtmsg_ifindex,
3610 .fc_metric = rtmsg->rtmsg_metric,
3611 .fc_expires = rtmsg->rtmsg_info,
3612 .fc_dst_len = rtmsg->rtmsg_dst_len,
3613 .fc_src_len = rtmsg->rtmsg_src_len,
3614 .fc_flags = rtmsg->rtmsg_flags,
3615 .fc_type = rtmsg->rtmsg_type,
3616
3617 .fc_nlinfo.nl_net = net,
3618
3619 .fc_dst = rtmsg->rtmsg_dst,
3620 .fc_src = rtmsg->rtmsg_src,
3621 .fc_gateway = rtmsg->rtmsg_gateway,
3622 };
86872cb5
TG
3623}
3624
5578689a 3625int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3626{
86872cb5 3627 struct fib6_config cfg;
1da177e4
LT
3628 struct in6_rtmsg rtmsg;
3629 int err;
3630
67ba4152 3631 switch (cmd) {
1da177e4
LT
3632 case SIOCADDRT: /* Add a route */
3633 case SIOCDELRT: /* Delete a route */
af31f412 3634 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3635 return -EPERM;
3636 err = copy_from_user(&rtmsg, arg,
3637 sizeof(struct in6_rtmsg));
3638 if (err)
3639 return -EFAULT;
86872cb5 3640
5578689a 3641 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3642
1da177e4
LT
3643 rtnl_lock();
3644 switch (cmd) {
3645 case SIOCADDRT:
acb54e3c 3646 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
1da177e4
LT
3647 break;
3648 case SIOCDELRT:
333c4301 3649 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3650 break;
3651 default:
3652 err = -EINVAL;
3653 }
3654 rtnl_unlock();
3655
3656 return err;
3ff50b79 3657 }
1da177e4
LT
3658
3659 return -EINVAL;
3660}
3661
3662/*
3663 * Drop the packet on the floor
3664 */
3665
d5fdd6ba 3666static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3667{
612f09e8 3668 int type;
adf30907 3669 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3670 switch (ipstats_mib_noroutes) {
3671 case IPSTATS_MIB_INNOROUTES:
0660e03f 3672 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3673 if (type == IPV6_ADDR_ANY) {
bdb7cc64
SS
3674 IP6_INC_STATS(dev_net(dst->dev),
3675 __in6_dev_get_safely(skb->dev),
3bd653c8 3676 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3677 break;
3678 }
3679 /* FALLTHROUGH */
3680 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3681 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3682 ipstats_mib_noroutes);
612f09e8
YH
3683 break;
3684 }
3ffe533c 3685 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3686 kfree_skb(skb);
3687 return 0;
3688}
3689
9ce8ade0
TG
3690static int ip6_pkt_discard(struct sk_buff *skb)
3691{
612f09e8 3692 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3693}
3694
ede2059d 3695static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3696{
adf30907 3697 skb->dev = skb_dst(skb)->dev;
612f09e8 3698 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3699}
3700
9ce8ade0
TG
3701static int ip6_pkt_prohibit(struct sk_buff *skb)
3702{
612f09e8 3703 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3704}
3705
ede2059d 3706static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3707{
adf30907 3708 skb->dev = skb_dst(skb)->dev;
612f09e8 3709 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3710}
3711
1da177e4
LT
3712/*
3713 * Allocate a dst for local (unicast / anycast) address.
3714 */
3715
360a9887
DA
3716struct fib6_info *addrconf_f6i_alloc(struct net *net,
3717 struct inet6_dev *idev,
3718 const struct in6_addr *addr,
3719 bool anycast, gfp_t gfp_flags)
1da177e4 3720{
ca254490 3721 u32 tb_id;
4832c30d 3722 struct net_device *dev = idev->dev;
360a9887 3723 struct fib6_info *f6i;
5f02ce24 3724
360a9887
DA
3725 f6i = fib6_info_alloc(gfp_flags);
3726 if (!f6i)
1da177e4
LT
3727 return ERR_PTR(-ENOMEM);
3728
767a2217 3729 f6i->fib6_metrics = ip_fib_metrics_init(net, NULL, 0);
360a9887 3730 f6i->dst_nocount = true;
360a9887
DA
3731 f6i->dst_host = true;
3732 f6i->fib6_protocol = RTPROT_KERNEL;
3733 f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
e8478e80 3734 if (anycast) {
360a9887
DA
3735 f6i->fib6_type = RTN_ANYCAST;
3736 f6i->fib6_flags |= RTF_ANYCAST;
e8478e80 3737 } else {
360a9887
DA
3738 f6i->fib6_type = RTN_LOCAL;
3739 f6i->fib6_flags |= RTF_LOCAL;
e8478e80 3740 }
1da177e4 3741
360a9887 3742 f6i->fib6_nh.nh_gw = *addr;
93531c67 3743 dev_hold(dev);
360a9887
DA
3744 f6i->fib6_nh.nh_dev = dev;
3745 f6i->fib6_dst.addr = *addr;
3746 f6i->fib6_dst.plen = 128;
ca254490 3747 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
360a9887 3748 f6i->fib6_table = fib6_get_table(net, tb_id);
1da177e4 3749
360a9887 3750 return f6i;
1da177e4
LT
3751}
3752
c3968a85
DW
3753/* remove deleted ip from prefsrc entries */
3754struct arg_dev_net_ip {
3755 struct net_device *dev;
3756 struct net *net;
3757 struct in6_addr *addr;
3758};
3759
8d1c802b 3760static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
c3968a85
DW
3761{
3762 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3763 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3764 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3765
5e670d84 3766 if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
421842ed 3767 rt != net->ipv6.fib6_null_entry &&
93c2fb25 3768 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
60006a48 3769 spin_lock_bh(&rt6_exception_lock);
c3968a85 3770 /* remove prefsrc entry */
93c2fb25 3771 rt->fib6_prefsrc.plen = 0;
60006a48 3772 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3773 }
3774 return 0;
3775}
3776
3777void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3778{
3779 struct net *net = dev_net(ifp->idev->dev);
3780 struct arg_dev_net_ip adni = {
3781 .dev = ifp->idev->dev,
3782 .net = net,
3783 .addr = &ifp->addr,
3784 };
0c3584d5 3785 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3786}
3787
be7a010d 3788#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3789
3790/* Remove routers and update dst entries when gateway turn into host. */
8d1c802b 3791static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
be7a010d
DJ
3792{
3793 struct in6_addr *gateway = (struct in6_addr *)arg;
3794
93c2fb25 3795 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
5e670d84 3796 ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
be7a010d
DJ
3797 return -1;
3798 }
b16cb459
WW
3799
3800 /* Further clean up cached routes in exception table.
3801 * This is needed because cached route may have a different
3802 * gateway than its 'parent' in the case of an ip redirect.
3803 */
3804 rt6_exceptions_clean_tohost(rt, gateway);
3805
be7a010d
DJ
3806 return 0;
3807}
3808
3809void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3810{
3811 fib6_clean_all(net, fib6_clean_tohost, gateway);
3812}
3813
2127d95a
IS
3814struct arg_netdev_event {
3815 const struct net_device *dev;
4c981e28
IS
3816 union {
3817 unsigned int nh_flags;
3818 unsigned long event;
3819 };
2127d95a
IS
3820};
3821
8d1c802b 3822static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
d7dedee1 3823{
8d1c802b 3824 struct fib6_info *iter;
d7dedee1
IS
3825 struct fib6_node *fn;
3826
93c2fb25
DA
3827 fn = rcu_dereference_protected(rt->fib6_node,
3828 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3829 iter = rcu_dereference_protected(fn->leaf,
93c2fb25 3830 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3831 while (iter) {
93c2fb25 3832 if (iter->fib6_metric == rt->fib6_metric &&
33bd5ac5 3833 rt6_qualify_for_ecmp(iter))
d7dedee1 3834 return iter;
8fb11a9a 3835 iter = rcu_dereference_protected(iter->fib6_next,
93c2fb25 3836 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1
IS
3837 }
3838
3839 return NULL;
3840}
3841
8d1c802b 3842static bool rt6_is_dead(const struct fib6_info *rt)
d7dedee1 3843{
5e670d84
DA
3844 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3845 (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
dcd1f572 3846 fib6_ignore_linkdown(rt)))
d7dedee1
IS
3847 return true;
3848
3849 return false;
3850}
3851
8d1c802b 3852static int rt6_multipath_total_weight(const struct fib6_info *rt)
d7dedee1 3853{
8d1c802b 3854 struct fib6_info *iter;
d7dedee1
IS
3855 int total = 0;
3856
3857 if (!rt6_is_dead(rt))
5e670d84 3858 total += rt->fib6_nh.nh_weight;
d7dedee1 3859
93c2fb25 3860 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
d7dedee1 3861 if (!rt6_is_dead(iter))
5e670d84 3862 total += iter->fib6_nh.nh_weight;
d7dedee1
IS
3863 }
3864
3865 return total;
3866}
3867
8d1c802b 3868static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
d7dedee1
IS
3869{
3870 int upper_bound = -1;
3871
3872 if (!rt6_is_dead(rt)) {
5e670d84 3873 *weight += rt->fib6_nh.nh_weight;
d7dedee1
IS
3874 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3875 total) - 1;
3876 }
5e670d84 3877 atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
d7dedee1
IS
3878}
3879
8d1c802b 3880static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
d7dedee1 3881{
8d1c802b 3882 struct fib6_info *iter;
d7dedee1
IS
3883 int weight = 0;
3884
3885 rt6_upper_bound_set(rt, &weight, total);
3886
93c2fb25 3887 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
d7dedee1
IS
3888 rt6_upper_bound_set(iter, &weight, total);
3889}
3890
8d1c802b 3891void rt6_multipath_rebalance(struct fib6_info *rt)
d7dedee1 3892{
8d1c802b 3893 struct fib6_info *first;
d7dedee1
IS
3894 int total;
3895
3896 /* In case the entire multipath route was marked for flushing,
3897 * then there is no need to rebalance upon the removal of every
3898 * sibling route.
3899 */
93c2fb25 3900 if (!rt->fib6_nsiblings || rt->should_flush)
d7dedee1
IS
3901 return;
3902
3903 /* During lookup routes are evaluated in order, so we need to
3904 * make sure upper bounds are assigned from the first sibling
3905 * onwards.
3906 */
3907 first = rt6_multipath_first_sibling(rt);
3908 if (WARN_ON_ONCE(!first))
3909 return;
3910
3911 total = rt6_multipath_total_weight(first);
3912 rt6_multipath_upper_bound_set(first, total);
3913}
3914
8d1c802b 3915static int fib6_ifup(struct fib6_info *rt, void *p_arg)
2127d95a
IS
3916{
3917 const struct arg_netdev_event *arg = p_arg;
7aef6859 3918 struct net *net = dev_net(arg->dev);
2127d95a 3919
421842ed 3920 if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
5e670d84 3921 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
7aef6859 3922 fib6_update_sernum_upto_root(net, rt);
d7dedee1 3923 rt6_multipath_rebalance(rt);
1de178ed 3924 }
2127d95a
IS
3925
3926 return 0;
3927}
3928
3929void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3930{
3931 struct arg_netdev_event arg = {
3932 .dev = dev,
6802f3ad
IS
3933 {
3934 .nh_flags = nh_flags,
3935 },
2127d95a
IS
3936 };
3937
3938 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3939 arg.nh_flags |= RTNH_F_LINKDOWN;
3940
3941 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3942}
3943
8d1c802b 3944static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
1de178ed
IS
3945 const struct net_device *dev)
3946{
8d1c802b 3947 struct fib6_info *iter;
1de178ed 3948
5e670d84 3949 if (rt->fib6_nh.nh_dev == dev)
1de178ed 3950 return true;
93c2fb25 3951 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84 3952 if (iter->fib6_nh.nh_dev == dev)
1de178ed
IS
3953 return true;
3954
3955 return false;
3956}
3957
8d1c802b 3958static void rt6_multipath_flush(struct fib6_info *rt)
1de178ed 3959{
8d1c802b 3960 struct fib6_info *iter;
1de178ed
IS
3961
3962 rt->should_flush = 1;
93c2fb25 3963 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1de178ed
IS
3964 iter->should_flush = 1;
3965}
3966
8d1c802b 3967static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
1de178ed
IS
3968 const struct net_device *down_dev)
3969{
8d1c802b 3970 struct fib6_info *iter;
1de178ed
IS
3971 unsigned int dead = 0;
3972
5e670d84
DA
3973 if (rt->fib6_nh.nh_dev == down_dev ||
3974 rt->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed 3975 dead++;
93c2fb25 3976 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
3977 if (iter->fib6_nh.nh_dev == down_dev ||
3978 iter->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed
IS
3979 dead++;
3980
3981 return dead;
3982}
3983
8d1c802b 3984static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
1de178ed
IS
3985 const struct net_device *dev,
3986 unsigned int nh_flags)
3987{
8d1c802b 3988 struct fib6_info *iter;
1de178ed 3989
5e670d84
DA
3990 if (rt->fib6_nh.nh_dev == dev)
3991 rt->fib6_nh.nh_flags |= nh_flags;
93c2fb25 3992 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
3993 if (iter->fib6_nh.nh_dev == dev)
3994 iter->fib6_nh.nh_flags |= nh_flags;
1de178ed
IS
3995}
3996
a1a22c12 3997/* called with write lock held for table with rt */
8d1c802b 3998static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
1da177e4 3999{
4c981e28
IS
4000 const struct arg_netdev_event *arg = p_arg;
4001 const struct net_device *dev = arg->dev;
7aef6859 4002 struct net *net = dev_net(dev);
8ed67789 4003
421842ed 4004 if (rt == net->ipv6.fib6_null_entry)
27c6fa73
IS
4005 return 0;
4006
4007 switch (arg->event) {
4008 case NETDEV_UNREGISTER:
5e670d84 4009 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
27c6fa73 4010 case NETDEV_DOWN:
1de178ed 4011 if (rt->should_flush)
27c6fa73 4012 return -1;
93c2fb25 4013 if (!rt->fib6_nsiblings)
5e670d84 4014 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
1de178ed
IS
4015 if (rt6_multipath_uses_dev(rt, dev)) {
4016 unsigned int count;
4017
4018 count = rt6_multipath_dead_count(rt, dev);
93c2fb25 4019 if (rt->fib6_nsiblings + 1 == count) {
1de178ed
IS
4020 rt6_multipath_flush(rt);
4021 return -1;
4022 }
4023 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4024 RTNH_F_LINKDOWN);
7aef6859 4025 fib6_update_sernum(net, rt);
d7dedee1 4026 rt6_multipath_rebalance(rt);
1de178ed
IS
4027 }
4028 return -2;
27c6fa73 4029 case NETDEV_CHANGE:
5e670d84 4030 if (rt->fib6_nh.nh_dev != dev ||
93c2fb25 4031 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 4032 break;
5e670d84 4033 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 4034 rt6_multipath_rebalance(rt);
27c6fa73 4035 break;
2b241361 4036 }
c159d30c 4037
1da177e4
LT
4038 return 0;
4039}
4040
27c6fa73 4041void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 4042{
4c981e28 4043 struct arg_netdev_event arg = {
8ed67789 4044 .dev = dev,
6802f3ad
IS
4045 {
4046 .event = event,
4047 },
8ed67789 4048 };
7c6bb7d2 4049 struct net *net = dev_net(dev);
8ed67789 4050
7c6bb7d2
DA
4051 if (net->ipv6.sysctl.skip_notify_on_dev_down)
4052 fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4053 else
4054 fib6_clean_all(net, fib6_ifdown, &arg);
4c981e28
IS
4055}
4056
4057void rt6_disable_ip(struct net_device *dev, unsigned long event)
4058{
4059 rt6_sync_down_dev(dev, event);
4060 rt6_uncached_list_flush_dev(dev_net(dev), dev);
4061 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
4062}
4063
95c96174 4064struct rt6_mtu_change_arg {
1da177e4 4065 struct net_device *dev;
95c96174 4066 unsigned int mtu;
1da177e4
LT
4067};
4068
8d1c802b 4069static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4070{
4071 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4072 struct inet6_dev *idev;
4073
4074 /* In IPv6 pmtu discovery is not optional,
4075 so that RTAX_MTU lock cannot disable it.
4076 We still use this lock to block changes
4077 caused by addrconf/ndisc.
4078 */
4079
4080 idev = __in6_dev_get(arg->dev);
38308473 4081 if (!idev)
1da177e4
LT
4082 return 0;
4083
4084 /* For administrative MTU increase, there is no way to discover
4085 IPv6 PMTU increase, so PMTU increase should be updated here.
4086 Since RFC 1981 doesn't include administrative MTU increase
4087 update PMTU increase is a MUST. (i.e. jumbo frame)
4088 */
5e670d84 4089 if (rt->fib6_nh.nh_dev == arg->dev &&
d4ead6b3
DA
4090 !fib6_metric_locked(rt, RTAX_MTU)) {
4091 u32 mtu = rt->fib6_pmtu;
4092
4093 if (mtu >= arg->mtu ||
4094 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4095 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4096
f5bbe7ee 4097 spin_lock_bh(&rt6_exception_lock);
e9fa1495 4098 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 4099 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4100 }
1da177e4
LT
4101 return 0;
4102}
4103
95c96174 4104void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4105{
c71099ac
TG
4106 struct rt6_mtu_change_arg arg = {
4107 .dev = dev,
4108 .mtu = mtu,
4109 };
1da177e4 4110
0c3584d5 4111 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4112}
4113
ef7c79ed 4114static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4115 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
aa8f8778 4116 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
86872cb5 4117 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4118 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4119 [RTA_PRIORITY] = { .type = NLA_U32 },
4120 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4121 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4122 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4123 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4124 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4125 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4126 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4127 [RTA_MARK] = { .type = NLA_U32 },
aa8f8778 4128 [RTA_TABLE] = { .type = NLA_U32 },
eacb9384
RP
4129 [RTA_IP_PROTO] = { .type = NLA_U8 },
4130 [RTA_SPORT] = { .type = NLA_U16 },
4131 [RTA_DPORT] = { .type = NLA_U16 },
86872cb5
TG
4132};
4133
4134static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4135 struct fib6_config *cfg,
4136 struct netlink_ext_ack *extack)
1da177e4 4137{
86872cb5
TG
4138 struct rtmsg *rtm;
4139 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4140 unsigned int pref;
86872cb5 4141 int err;
1da177e4 4142
fceb6435 4143 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
dac9c979 4144 extack);
86872cb5
TG
4145 if (err < 0)
4146 goto errout;
1da177e4 4147
86872cb5
TG
4148 err = -EINVAL;
4149 rtm = nlmsg_data(nlh);
86872cb5 4150
84db8407
4151 *cfg = (struct fib6_config){
4152 .fc_table = rtm->rtm_table,
4153 .fc_dst_len = rtm->rtm_dst_len,
4154 .fc_src_len = rtm->rtm_src_len,
4155 .fc_flags = RTF_UP,
4156 .fc_protocol = rtm->rtm_protocol,
4157 .fc_type = rtm->rtm_type,
4158
4159 .fc_nlinfo.portid = NETLINK_CB(skb).portid,
4160 .fc_nlinfo.nlh = nlh,
4161 .fc_nlinfo.nl_net = sock_net(skb->sk),
4162 };
86872cb5 4163
ef2c7d7b
ND
4164 if (rtm->rtm_type == RTN_UNREACHABLE ||
4165 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4166 rtm->rtm_type == RTN_PROHIBIT ||
4167 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4168 cfg->fc_flags |= RTF_REJECT;
4169
ab79ad14
4170 if (rtm->rtm_type == RTN_LOCAL)
4171 cfg->fc_flags |= RTF_LOCAL;
4172
1f56a01f
MKL
4173 if (rtm->rtm_flags & RTM_F_CLONED)
4174 cfg->fc_flags |= RTF_CACHE;
4175
fc1e64e1
DA
4176 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4177
86872cb5 4178 if (tb[RTA_GATEWAY]) {
67b61f6c 4179 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4180 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4181 }
30b9819b
DA
4182 if (tb[RTA_VIA]) {
4183 NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4184 goto errout;
4185 }
86872cb5
TG
4186
4187 if (tb[RTA_DST]) {
4188 int plen = (rtm->rtm_dst_len + 7) >> 3;
4189
4190 if (nla_len(tb[RTA_DST]) < plen)
4191 goto errout;
4192
4193 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4194 }
86872cb5
TG
4195
4196 if (tb[RTA_SRC]) {
4197 int plen = (rtm->rtm_src_len + 7) >> 3;
4198
4199 if (nla_len(tb[RTA_SRC]) < plen)
4200 goto errout;
4201
4202 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4203 }
86872cb5 4204
c3968a85 4205 if (tb[RTA_PREFSRC])
67b61f6c 4206 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4207
86872cb5
TG
4208 if (tb[RTA_OIF])
4209 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4210
4211 if (tb[RTA_PRIORITY])
4212 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4213
4214 if (tb[RTA_METRICS]) {
4215 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4216 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4217 }
86872cb5
TG
4218
4219 if (tb[RTA_TABLE])
4220 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4221
51ebd318
ND
4222 if (tb[RTA_MULTIPATH]) {
4223 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4224 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4225
4226 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4227 cfg->fc_mp_len, extack);
9ed59592
DA
4228 if (err < 0)
4229 goto errout;
51ebd318
ND
4230 }
4231
c78ba6d6
LR
4232 if (tb[RTA_PREF]) {
4233 pref = nla_get_u8(tb[RTA_PREF]);
4234 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4235 pref != ICMPV6_ROUTER_PREF_HIGH)
4236 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4237 cfg->fc_flags |= RTF_PREF(pref);
4238 }
4239
19e42e45
RP
4240 if (tb[RTA_ENCAP])
4241 cfg->fc_encap = tb[RTA_ENCAP];
4242
9ed59592 4243 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4244 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4245
c255bd68 4246 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4247 if (err < 0)
4248 goto errout;
4249 }
4250
32bc201e
XL
4251 if (tb[RTA_EXPIRES]) {
4252 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4253
4254 if (addrconf_finite_timeout(timeout)) {
4255 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4256 cfg->fc_flags |= RTF_EXPIRES;
4257 }
4258 }
4259
86872cb5
TG
4260 err = 0;
4261errout:
4262 return err;
1da177e4
LT
4263}
4264
6b9ea5a6 4265struct rt6_nh {
8d1c802b 4266 struct fib6_info *fib6_info;
6b9ea5a6 4267 struct fib6_config r_cfg;
6b9ea5a6
RP
4268 struct list_head next;
4269};
4270
4271static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4272{
4273 struct rt6_nh *nh;
4274
4275 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 4276 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
4277 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4278 nh->r_cfg.fc_ifindex);
4279 }
4280}
4281
d4ead6b3
DA
4282static int ip6_route_info_append(struct net *net,
4283 struct list_head *rt6_nh_list,
8d1c802b
DA
4284 struct fib6_info *rt,
4285 struct fib6_config *r_cfg)
6b9ea5a6
RP
4286{
4287 struct rt6_nh *nh;
6b9ea5a6
RP
4288 int err = -EEXIST;
4289
4290 list_for_each_entry(nh, rt6_nh_list, next) {
8d1c802b
DA
4291 /* check if fib6_info already exists */
4292 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
6b9ea5a6
RP
4293 return err;
4294 }
4295
4296 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4297 if (!nh)
4298 return -ENOMEM;
8d1c802b 4299 nh->fib6_info = rt;
6b9ea5a6
RP
4300 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4301 list_add_tail(&nh->next, rt6_nh_list);
4302
4303 return 0;
4304}
4305
8d1c802b
DA
4306static void ip6_route_mpath_notify(struct fib6_info *rt,
4307 struct fib6_info *rt_last,
3b1137fe
DA
4308 struct nl_info *info,
4309 __u16 nlflags)
4310{
4311 /* if this is an APPEND route, then rt points to the first route
4312 * inserted and rt_last points to last route inserted. Userspace
4313 * wants a consistent dump of the route which starts at the first
4314 * nexthop. Since sibling routes are always added at the end of
4315 * the list, find the first sibling of the last route appended
4316 */
93c2fb25
DA
4317 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4318 rt = list_first_entry(&rt_last->fib6_siblings,
8d1c802b 4319 struct fib6_info,
93c2fb25 4320 fib6_siblings);
3b1137fe
DA
4321 }
4322
4323 if (rt)
4324 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4325}
4326
333c4301
DA
4327static int ip6_route_multipath_add(struct fib6_config *cfg,
4328 struct netlink_ext_ack *extack)
51ebd318 4329{
8d1c802b 4330 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
3b1137fe 4331 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4332 struct fib6_config r_cfg;
4333 struct rtnexthop *rtnh;
8d1c802b 4334 struct fib6_info *rt;
6b9ea5a6
RP
4335 struct rt6_nh *err_nh;
4336 struct rt6_nh *nh, *nh_safe;
3b1137fe 4337 __u16 nlflags;
51ebd318
ND
4338 int remaining;
4339 int attrlen;
6b9ea5a6
RP
4340 int err = 1;
4341 int nhn = 0;
4342 int replace = (cfg->fc_nlinfo.nlh &&
4343 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4344 LIST_HEAD(rt6_nh_list);
51ebd318 4345
3b1137fe
DA
4346 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4347 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4348 nlflags |= NLM_F_APPEND;
4349
35f1b4e9 4350 remaining = cfg->fc_mp_len;
51ebd318 4351 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4352
6b9ea5a6 4353 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
8d1c802b 4354 * fib6_info structs per nexthop
6b9ea5a6 4355 */
51ebd318
ND
4356 while (rtnh_ok(rtnh, remaining)) {
4357 memcpy(&r_cfg, cfg, sizeof(*cfg));
4358 if (rtnh->rtnh_ifindex)
4359 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4360
4361 attrlen = rtnh_attrlen(rtnh);
4362 if (attrlen > 0) {
4363 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4364
4365 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4366 if (nla) {
67b61f6c 4367 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4368 r_cfg.fc_flags |= RTF_GATEWAY;
4369 }
19e42e45
RP
4370 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4371 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4372 if (nla)
4373 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4374 }
6b9ea5a6 4375
68e2ffde 4376 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
acb54e3c 4377 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
8c5b83f0
RP
4378 if (IS_ERR(rt)) {
4379 err = PTR_ERR(rt);
4380 rt = NULL;
6b9ea5a6 4381 goto cleanup;
8c5b83f0 4382 }
b5d2d75e
DA
4383 if (!rt6_qualify_for_ecmp(rt)) {
4384 err = -EINVAL;
4385 NL_SET_ERR_MSG(extack,
4386 "Device only routes can not be added for IPv6 using the multipath API.");
4387 fib6_info_release(rt);
4388 goto cleanup;
4389 }
6b9ea5a6 4390
5e670d84 4391 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
398958ae 4392
d4ead6b3
DA
4393 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4394 rt, &r_cfg);
51ebd318 4395 if (err) {
93531c67 4396 fib6_info_release(rt);
6b9ea5a6
RP
4397 goto cleanup;
4398 }
4399
4400 rtnh = rtnh_next(rtnh, &remaining);
4401 }
4402
3b1137fe
DA
4403 /* for add and replace send one notification with all nexthops.
4404 * Skip the notification in fib6_add_rt2node and send one with
4405 * the full route when done
4406 */
4407 info->skip_notify = 1;
4408
6b9ea5a6
RP
4409 err_nh = NULL;
4410 list_for_each_entry(nh, &rt6_nh_list, next) {
8d1c802b
DA
4411 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4412 fib6_info_release(nh->fib6_info);
93531c67 4413
f7225172
DA
4414 if (!err) {
4415 /* save reference to last route successfully inserted */
4416 rt_last = nh->fib6_info;
4417
4418 /* save reference to first route for notification */
4419 if (!rt_notif)
4420 rt_notif = nh->fib6_info;
4421 }
3b1137fe 4422
8d1c802b
DA
4423 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4424 nh->fib6_info = NULL;
6b9ea5a6
RP
4425 if (err) {
4426 if (replace && nhn)
4427 ip6_print_replace_route_err(&rt6_nh_list);
4428 err_nh = nh;
4429 goto add_errout;
51ebd318 4430 }
6b9ea5a6 4431
1a72418b 4432 /* Because each route is added like a single route we remove
27596472
MK
4433 * these flags after the first nexthop: if there is a collision,
4434 * we have already failed to add the first nexthop:
4435 * fib6_add_rt2node() has rejected it; when replacing, old
4436 * nexthops have been replaced by first new, the rest should
4437 * be added to it.
1a72418b 4438 */
27596472
MK
4439 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4440 NLM_F_REPLACE);
6b9ea5a6
RP
4441 nhn++;
4442 }
4443
3b1137fe
DA
4444 /* success ... tell user about new route */
4445 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4446 goto cleanup;
4447
4448add_errout:
3b1137fe
DA
4449 /* send notification for routes that were added so that
4450 * the delete notifications sent by ip6_route_del are
4451 * coherent
4452 */
4453 if (rt_notif)
4454 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4455
6b9ea5a6
RP
4456 /* Delete routes that were already added */
4457 list_for_each_entry(nh, &rt6_nh_list, next) {
4458 if (err_nh == nh)
4459 break;
333c4301 4460 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4461 }
4462
4463cleanup:
4464 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
8d1c802b
DA
4465 if (nh->fib6_info)
4466 fib6_info_release(nh->fib6_info);
6b9ea5a6
RP
4467 list_del(&nh->next);
4468 kfree(nh);
4469 }
4470
4471 return err;
4472}
4473
333c4301
DA
4474static int ip6_route_multipath_del(struct fib6_config *cfg,
4475 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4476{
4477 struct fib6_config r_cfg;
4478 struct rtnexthop *rtnh;
4479 int remaining;
4480 int attrlen;
4481 int err = 1, last_err = 0;
4482
4483 remaining = cfg->fc_mp_len;
4484 rtnh = (struct rtnexthop *)cfg->fc_mp;
4485
4486 /* Parse a Multipath Entry */
4487 while (rtnh_ok(rtnh, remaining)) {
4488 memcpy(&r_cfg, cfg, sizeof(*cfg));
4489 if (rtnh->rtnh_ifindex)
4490 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4491
4492 attrlen = rtnh_attrlen(rtnh);
4493 if (attrlen > 0) {
4494 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4495
4496 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4497 if (nla) {
4498 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4499 r_cfg.fc_flags |= RTF_GATEWAY;
4500 }
4501 }
333c4301 4502 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4503 if (err)
4504 last_err = err;
4505
51ebd318
ND
4506 rtnh = rtnh_next(rtnh, &remaining);
4507 }
4508
4509 return last_err;
4510}
4511
c21ef3e3
DA
4512static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4513 struct netlink_ext_ack *extack)
1da177e4 4514{
86872cb5
TG
4515 struct fib6_config cfg;
4516 int err;
1da177e4 4517
333c4301 4518 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4519 if (err < 0)
4520 return err;
4521
51ebd318 4522 if (cfg.fc_mp)
333c4301 4523 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4524 else {
4525 cfg.fc_delete_all_nh = 1;
333c4301 4526 return ip6_route_del(&cfg, extack);
0ae81335 4527 }
1da177e4
LT
4528}
4529
c21ef3e3
DA
4530static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4531 struct netlink_ext_ack *extack)
1da177e4 4532{
86872cb5
TG
4533 struct fib6_config cfg;
4534 int err;
1da177e4 4535
333c4301 4536 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4537 if (err < 0)
4538 return err;
4539
51ebd318 4540 if (cfg.fc_mp)
333c4301 4541 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4542 else
acb54e3c 4543 return ip6_route_add(&cfg, GFP_KERNEL, extack);
1da177e4
LT
4544}
4545
8d1c802b 4546static size_t rt6_nlmsg_size(struct fib6_info *rt)
339bf98f 4547{
beb1afac
DA
4548 int nexthop_len = 0;
4549
93c2fb25 4550 if (rt->fib6_nsiblings) {
beb1afac
DA
4551 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4552 + NLA_ALIGN(sizeof(struct rtnexthop))
4553 + nla_total_size(16) /* RTA_GATEWAY */
5e670d84 4554 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
beb1afac 4555
93c2fb25 4556 nexthop_len *= rt->fib6_nsiblings;
beb1afac
DA
4557 }
4558
339bf98f
TG
4559 return NLMSG_ALIGN(sizeof(struct rtmsg))
4560 + nla_total_size(16) /* RTA_SRC */
4561 + nla_total_size(16) /* RTA_DST */
4562 + nla_total_size(16) /* RTA_GATEWAY */
4563 + nla_total_size(16) /* RTA_PREFSRC */
4564 + nla_total_size(4) /* RTA_TABLE */
4565 + nla_total_size(4) /* RTA_IIF */
4566 + nla_total_size(4) /* RTA_OIF */
4567 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4568 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4569 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4570 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4571 + nla_total_size(1) /* RTA_PREF */
5e670d84 4572 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
beb1afac
DA
4573 + nexthop_len;
4574}
4575
8d1c802b 4576static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
5be083ce 4577 unsigned int *flags, bool skip_oif)
beb1afac 4578{
5e670d84 4579 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
f9d882ea
IS
4580 *flags |= RTNH_F_DEAD;
4581
5e670d84 4582 if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
beb1afac 4583 *flags |= RTNH_F_LINKDOWN;
dcd1f572
DA
4584
4585 rcu_read_lock();
4586 if (fib6_ignore_linkdown(rt))
beb1afac 4587 *flags |= RTNH_F_DEAD;
dcd1f572 4588 rcu_read_unlock();
beb1afac
DA
4589 }
4590
93c2fb25 4591 if (rt->fib6_flags & RTF_GATEWAY) {
5e670d84 4592 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
beb1afac
DA
4593 goto nla_put_failure;
4594 }
4595
5e670d84
DA
4596 *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4597 if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
4598 *flags |= RTNH_F_OFFLOAD;
4599
5be083ce 4600 /* not needed for multipath encoding b/c it has a rtnexthop struct */
5e670d84
DA
4601 if (!skip_oif && rt->fib6_nh.nh_dev &&
4602 nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
beb1afac
DA
4603 goto nla_put_failure;
4604
5e670d84
DA
4605 if (rt->fib6_nh.nh_lwtstate &&
4606 lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
beb1afac
DA
4607 goto nla_put_failure;
4608
4609 return 0;
4610
4611nla_put_failure:
4612 return -EMSGSIZE;
4613}
4614
5be083ce 4615/* add multipath next hop */
8d1c802b 4616static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
beb1afac 4617{
5e670d84 4618 const struct net_device *dev = rt->fib6_nh.nh_dev;
beb1afac
DA
4619 struct rtnexthop *rtnh;
4620 unsigned int flags = 0;
4621
4622 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4623 if (!rtnh)
4624 goto nla_put_failure;
4625
5e670d84
DA
4626 rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4627 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
beb1afac 4628
5be083ce 4629 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4630 goto nla_put_failure;
4631
4632 rtnh->rtnh_flags = flags;
4633
4634 /* length of rtnetlink header + attributes */
4635 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4636
4637 return 0;
4638
4639nla_put_failure:
4640 return -EMSGSIZE;
339bf98f
TG
4641}
4642
d4ead6b3 4643static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 4644 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 4645 struct in6_addr *dest, struct in6_addr *src,
15e47304 4646 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4647 unsigned int flags)
1da177e4 4648{
22d0bd82
XL
4649 struct rt6_info *rt6 = (struct rt6_info *)dst;
4650 struct rt6key *rt6_dst, *rt6_src;
4651 u32 *pmetrics, table, rt6_flags;
2d7202bf 4652 struct nlmsghdr *nlh;
22d0bd82 4653 struct rtmsg *rtm;
d4ead6b3 4654 long expires = 0;
1da177e4 4655
15e47304 4656 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4657 if (!nlh)
26932566 4658 return -EMSGSIZE;
2d7202bf 4659
22d0bd82
XL
4660 if (rt6) {
4661 rt6_dst = &rt6->rt6i_dst;
4662 rt6_src = &rt6->rt6i_src;
4663 rt6_flags = rt6->rt6i_flags;
4664 } else {
4665 rt6_dst = &rt->fib6_dst;
4666 rt6_src = &rt->fib6_src;
4667 rt6_flags = rt->fib6_flags;
4668 }
4669
2d7202bf 4670 rtm = nlmsg_data(nlh);
1da177e4 4671 rtm->rtm_family = AF_INET6;
22d0bd82
XL
4672 rtm->rtm_dst_len = rt6_dst->plen;
4673 rtm->rtm_src_len = rt6_src->plen;
1da177e4 4674 rtm->rtm_tos = 0;
93c2fb25
DA
4675 if (rt->fib6_table)
4676 table = rt->fib6_table->tb6_id;
c71099ac 4677 else
9e762a4a 4678 table = RT6_TABLE_UNSPEC;
53ad6977 4679 rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
c78679e8
DM
4680 if (nla_put_u32(skb, RTA_TABLE, table))
4681 goto nla_put_failure;
e8478e80
DA
4682
4683 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4684 rtm->rtm_flags = 0;
4685 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
93c2fb25 4686 rtm->rtm_protocol = rt->fib6_protocol;
1da177e4 4687
22d0bd82 4688 if (rt6_flags & RTF_CACHE)
1da177e4
LT
4689 rtm->rtm_flags |= RTM_F_CLONED;
4690
d4ead6b3
DA
4691 if (dest) {
4692 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4693 goto nla_put_failure;
1ab1457c 4694 rtm->rtm_dst_len = 128;
1da177e4 4695 } else if (rtm->rtm_dst_len)
22d0bd82 4696 if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
c78679e8 4697 goto nla_put_failure;
1da177e4
LT
4698#ifdef CONFIG_IPV6_SUBTREES
4699 if (src) {
930345ea 4700 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4701 goto nla_put_failure;
1ab1457c 4702 rtm->rtm_src_len = 128;
c78679e8 4703 } else if (rtm->rtm_src_len &&
22d0bd82 4704 nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
c78679e8 4705 goto nla_put_failure;
1da177e4 4706#endif
7bc570c8
YH
4707 if (iif) {
4708#ifdef CONFIG_IPV6_MROUTE
22d0bd82 4709 if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
fd61c6ba
DA
4710 int err = ip6mr_get_route(net, skb, rtm, portid);
4711
4712 if (err == 0)
4713 return 0;
4714 if (err < 0)
4715 goto nla_put_failure;
7bc570c8
YH
4716 } else
4717#endif
c78679e8
DM
4718 if (nla_put_u32(skb, RTA_IIF, iif))
4719 goto nla_put_failure;
d4ead6b3 4720 } else if (dest) {
1da177e4 4721 struct in6_addr saddr_buf;
d4ead6b3 4722 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4723 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4724 goto nla_put_failure;
1da177e4 4725 }
2d7202bf 4726
93c2fb25 4727 if (rt->fib6_prefsrc.plen) {
c3968a85 4728 struct in6_addr saddr_buf;
93c2fb25 4729 saddr_buf = rt->fib6_prefsrc.addr;
930345ea 4730 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4731 goto nla_put_failure;
c3968a85
DW
4732 }
4733
d4ead6b3
DA
4734 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4735 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4736 goto nla_put_failure;
4737
93c2fb25 4738 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
c78679e8 4739 goto nla_put_failure;
8253947e 4740
beb1afac
DA
4741 /* For multipath routes, walk the siblings list and add
4742 * each as a nexthop within RTA_MULTIPATH.
4743 */
22d0bd82
XL
4744 if (rt6) {
4745 if (rt6_flags & RTF_GATEWAY &&
4746 nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
4747 goto nla_put_failure;
4748
4749 if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
4750 goto nla_put_failure;
4751 } else if (rt->fib6_nsiblings) {
8d1c802b 4752 struct fib6_info *sibling, *next_sibling;
beb1afac
DA
4753 struct nlattr *mp;
4754
4755 mp = nla_nest_start(skb, RTA_MULTIPATH);
4756 if (!mp)
4757 goto nla_put_failure;
4758
4759 if (rt6_add_nexthop(skb, rt) < 0)
4760 goto nla_put_failure;
4761
4762 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25 4763 &rt->fib6_siblings, fib6_siblings) {
beb1afac
DA
4764 if (rt6_add_nexthop(skb, sibling) < 0)
4765 goto nla_put_failure;
4766 }
4767
4768 nla_nest_end(skb, mp);
4769 } else {
5be083ce 4770 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4771 goto nla_put_failure;
4772 }
4773
22d0bd82 4774 if (rt6_flags & RTF_EXPIRES) {
14895687
DA
4775 expires = dst ? dst->expires : rt->expires;
4776 expires -= jiffies;
4777 }
69cdf8f9 4778
d4ead6b3 4779 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 4780 goto nla_put_failure;
2d7202bf 4781
22d0bd82 4782 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
c78ba6d6
LR
4783 goto nla_put_failure;
4784
19e42e45 4785
053c095a
JB
4786 nlmsg_end(skb, nlh);
4787 return 0;
2d7202bf
TG
4788
4789nla_put_failure:
26932566
PM
4790 nlmsg_cancel(skb, nlh);
4791 return -EMSGSIZE;
1da177e4
LT
4792}
4793
13e38901
DA
4794static bool fib6_info_uses_dev(const struct fib6_info *f6i,
4795 const struct net_device *dev)
4796{
4797 if (f6i->fib6_nh.nh_dev == dev)
4798 return true;
4799
4800 if (f6i->fib6_nsiblings) {
4801 struct fib6_info *sibling, *next_sibling;
4802
4803 list_for_each_entry_safe(sibling, next_sibling,
4804 &f6i->fib6_siblings, fib6_siblings) {
4805 if (sibling->fib6_nh.nh_dev == dev)
4806 return true;
4807 }
4808 }
4809
4810 return false;
4811}
4812
8d1c802b 4813int rt6_dump_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4814{
4815 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
13e38901
DA
4816 struct fib_dump_filter *filter = &arg->filter;
4817 unsigned int flags = NLM_F_MULTI;
1f17e2f2
DA
4818 struct net *net = arg->net;
4819
421842ed 4820 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 4821 return 0;
1da177e4 4822
13e38901
DA
4823 if ((filter->flags & RTM_F_PREFIX) &&
4824 !(rt->fib6_flags & RTF_PREFIX_RT)) {
4825 /* success since this is not a prefix route */
4826 return 1;
4827 }
4828 if (filter->filter_set) {
4829 if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
4830 (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
4831 (filter->protocol && rt->fib6_protocol != filter->protocol)) {
f8cfe2ce
DA
4832 return 1;
4833 }
13e38901 4834 flags |= NLM_F_DUMP_FILTERED;
f8cfe2ce 4835 }
1da177e4 4836
d4ead6b3
DA
4837 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4838 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
13e38901 4839 arg->cb->nlh->nlmsg_seq, flags);
1da177e4
LT
4840}
4841
c21ef3e3
DA
4842static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4843 struct netlink_ext_ack *extack)
1da177e4 4844{
3b1e0a65 4845 struct net *net = sock_net(in_skb->sk);
ab364a6f 4846 struct nlattr *tb[RTA_MAX+1];
18c3a61c 4847 int err, iif = 0, oif = 0;
a68886a6 4848 struct fib6_info *from;
18c3a61c 4849 struct dst_entry *dst;
ab364a6f 4850 struct rt6_info *rt;
1da177e4 4851 struct sk_buff *skb;
ab364a6f 4852 struct rtmsg *rtm;
744486d4 4853 struct flowi6 fl6 = {};
18c3a61c 4854 bool fibmatch;
1da177e4 4855
fceb6435 4856 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4857 extack);
ab364a6f
TG
4858 if (err < 0)
4859 goto errout;
1da177e4 4860
ab364a6f 4861 err = -EINVAL;
38b7097b
HFS
4862 rtm = nlmsg_data(nlh);
4863 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4864 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4865
ab364a6f
TG
4866 if (tb[RTA_SRC]) {
4867 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4868 goto errout;
4869
4e3fd7a0 4870 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4871 }
4872
4873 if (tb[RTA_DST]) {
4874 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4875 goto errout;
4876
4e3fd7a0 4877 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4878 }
4879
4880 if (tb[RTA_IIF])
4881 iif = nla_get_u32(tb[RTA_IIF]);
4882
4883 if (tb[RTA_OIF])
72331bc0 4884 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4885
2e47b291
LC
4886 if (tb[RTA_MARK])
4887 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4888
622ec2c9
LC
4889 if (tb[RTA_UID])
4890 fl6.flowi6_uid = make_kuid(current_user_ns(),
4891 nla_get_u32(tb[RTA_UID]));
4892 else
4893 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4894
eacb9384
RP
4895 if (tb[RTA_SPORT])
4896 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4897
4898 if (tb[RTA_DPORT])
4899 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4900
4901 if (tb[RTA_IP_PROTO]) {
4902 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
b17fa794
HL
4903 &fl6.flowi6_proto, AF_INET6,
4904 extack);
eacb9384
RP
4905 if (err)
4906 goto errout;
4907 }
4908
1da177e4
LT
4909 if (iif) {
4910 struct net_device *dev;
72331bc0
SL
4911 int flags = 0;
4912
121622db
FW
4913 rcu_read_lock();
4914
4915 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4916 if (!dev) {
121622db 4917 rcu_read_unlock();
1da177e4 4918 err = -ENODEV;
ab364a6f 4919 goto errout;
1da177e4 4920 }
72331bc0
SL
4921
4922 fl6.flowi6_iif = iif;
4923
4924 if (!ipv6_addr_any(&fl6.saddr))
4925 flags |= RT6_LOOKUP_F_HAS_SADDR;
4926
b75cc8f9 4927 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
4928
4929 rcu_read_unlock();
72331bc0
SL
4930 } else {
4931 fl6.flowi6_oif = oif;
4932
58acfd71 4933 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4934 }
4935
18c3a61c
RP
4936
4937 rt = container_of(dst, struct rt6_info, dst);
4938 if (rt->dst.error) {
4939 err = rt->dst.error;
4940 ip6_rt_put(rt);
4941 goto errout;
1da177e4
LT
4942 }
4943
9d6acb3b
WC
4944 if (rt == net->ipv6.ip6_null_entry) {
4945 err = rt->dst.error;
4946 ip6_rt_put(rt);
4947 goto errout;
4948 }
4949
ab364a6f 4950 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4951 if (!skb) {
94e187c0 4952 ip6_rt_put(rt);
ab364a6f
TG
4953 err = -ENOBUFS;
4954 goto errout;
4955 }
1da177e4 4956
d8d1f30b 4957 skb_dst_set(skb, &rt->dst);
a68886a6
DA
4958
4959 rcu_read_lock();
4960 from = rcu_dereference(rt->from);
4961
18c3a61c 4962 if (fibmatch)
a68886a6 4963 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
18c3a61c
RP
4964 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4965 nlh->nlmsg_seq, 0);
4966 else
a68886a6
DA
4967 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
4968 &fl6.saddr, iif, RTM_NEWROUTE,
d4ead6b3
DA
4969 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4970 0);
a68886a6
DA
4971 rcu_read_unlock();
4972
1da177e4 4973 if (err < 0) {
ab364a6f
TG
4974 kfree_skb(skb);
4975 goto errout;
1da177e4
LT
4976 }
4977
15e47304 4978 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4979errout:
1da177e4 4980 return err;
1da177e4
LT
4981}
4982
8d1c802b 4983void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
37a1d361 4984 unsigned int nlm_flags)
1da177e4
LT
4985{
4986 struct sk_buff *skb;
5578689a 4987 struct net *net = info->nl_net;
528c4ceb
DL
4988 u32 seq;
4989 int err;
4990
4991 err = -ENOBUFS;
38308473 4992 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4993
19e42e45 4994 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4995 if (!skb)
21713ebc
TG
4996 goto errout;
4997
d4ead6b3
DA
4998 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4999 event, info->portid, seq, nlm_flags);
26932566
PM
5000 if (err < 0) {
5001 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
5002 WARN_ON(err == -EMSGSIZE);
5003 kfree_skb(skb);
5004 goto errout;
5005 }
15e47304 5006 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
5007 info->nlh, gfp_any());
5008 return;
21713ebc
TG
5009errout:
5010 if (err < 0)
5578689a 5011 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
5012}
5013
8ed67789 5014static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 5015 unsigned long event, void *ptr)
8ed67789 5016{
351638e7 5017 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 5018 struct net *net = dev_net(dev);
8ed67789 5019
242d3a49
WC
5020 if (!(dev->flags & IFF_LOOPBACK))
5021 return NOTIFY_OK;
5022
5023 if (event == NETDEV_REGISTER) {
421842ed 5024 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
d8d1f30b 5025 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
5026 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5027#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 5028 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 5029 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 5030 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 5031 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 5032#endif
76da0704
WC
5033 } else if (event == NETDEV_UNREGISTER &&
5034 dev->reg_state != NETREG_UNREGISTERED) {
5035 /* NETDEV_UNREGISTER could be fired for multiple times by
5036 * netdev_wait_allrefs(). Make sure we only call this once.
5037 */
12d94a80 5038 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 5039#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
5040 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5041 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
5042#endif
5043 }
5044
5045 return NOTIFY_OK;
5046}
5047
1da177e4
LT
5048/*
5049 * /proc
5050 */
5051
5052#ifdef CONFIG_PROC_FS
1da177e4
LT
5053static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5054{
69ddb805 5055 struct net *net = (struct net *)seq->private;
1da177e4 5056 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
5057 net->ipv6.rt6_stats->fib_nodes,
5058 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 5059 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
5060 net->ipv6.rt6_stats->fib_rt_entries,
5061 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 5062 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 5063 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
5064
5065 return 0;
5066}
1da177e4
LT
5067#endif /* CONFIG_PROC_FS */
5068
5069#ifdef CONFIG_SYSCTL
5070
1da177e4 5071static
fe2c6338 5072int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
5073 void __user *buffer, size_t *lenp, loff_t *ppos)
5074{
c486da34
LAG
5075 struct net *net;
5076 int delay;
5077 if (!write)
1da177e4 5078 return -EINVAL;
c486da34
LAG
5079
5080 net = (struct net *)ctl->extra1;
5081 delay = net->ipv6.sysctl.flush_delay;
5082 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 5083 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 5084 return 0;
1da177e4
LT
5085}
5086
7c6bb7d2
DA
5087static int zero;
5088static int one = 1;
5089
ed792e28 5090static struct ctl_table ipv6_route_table_template[] = {
1ab1457c 5091 {
1da177e4 5092 .procname = "flush",
4990509f 5093 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 5094 .maxlen = sizeof(int),
89c8b3a1 5095 .mode = 0200,
6d9f239a 5096 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
5097 },
5098 {
1da177e4 5099 .procname = "gc_thresh",
9a7ec3a9 5100 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
5101 .maxlen = sizeof(int),
5102 .mode = 0644,
6d9f239a 5103 .proc_handler = proc_dointvec,
1da177e4
LT
5104 },
5105 {
1da177e4 5106 .procname = "max_size",
4990509f 5107 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
5108 .maxlen = sizeof(int),
5109 .mode = 0644,
6d9f239a 5110 .proc_handler = proc_dointvec,
1da177e4
LT
5111 },
5112 {
1da177e4 5113 .procname = "gc_min_interval",
4990509f 5114 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5115 .maxlen = sizeof(int),
5116 .mode = 0644,
6d9f239a 5117 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5118 },
5119 {
1da177e4 5120 .procname = "gc_timeout",
4990509f 5121 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
5122 .maxlen = sizeof(int),
5123 .mode = 0644,
6d9f239a 5124 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5125 },
5126 {
1da177e4 5127 .procname = "gc_interval",
4990509f 5128 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
5129 .maxlen = sizeof(int),
5130 .mode = 0644,
6d9f239a 5131 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5132 },
5133 {
1da177e4 5134 .procname = "gc_elasticity",
4990509f 5135 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
5136 .maxlen = sizeof(int),
5137 .mode = 0644,
f3d3f616 5138 .proc_handler = proc_dointvec,
1da177e4
LT
5139 },
5140 {
1da177e4 5141 .procname = "mtu_expires",
4990509f 5142 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
5143 .maxlen = sizeof(int),
5144 .mode = 0644,
6d9f239a 5145 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5146 },
5147 {
1da177e4 5148 .procname = "min_adv_mss",
4990509f 5149 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
5150 .maxlen = sizeof(int),
5151 .mode = 0644,
f3d3f616 5152 .proc_handler = proc_dointvec,
1da177e4
LT
5153 },
5154 {
1da177e4 5155 .procname = "gc_min_interval_ms",
4990509f 5156 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5157 .maxlen = sizeof(int),
5158 .mode = 0644,
6d9f239a 5159 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 5160 },
7c6bb7d2
DA
5161 {
5162 .procname = "skip_notify_on_dev_down",
5163 .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
5164 .maxlen = sizeof(int),
5165 .mode = 0644,
5166 .proc_handler = proc_dointvec,
5167 .extra1 = &zero,
5168 .extra2 = &one,
5169 },
f8572d8f 5170 { }
1da177e4
LT
5171};
5172
2c8c1e72 5173struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5174{
5175 struct ctl_table *table;
5176
5177 table = kmemdup(ipv6_route_table_template,
5178 sizeof(ipv6_route_table_template),
5179 GFP_KERNEL);
5ee09105
YH
5180
5181 if (table) {
5182 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5183 table[0].extra1 = net;
86393e52 5184 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5185 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5186 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5187 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5188 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5189 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5190 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5191 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5192 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
7c6bb7d2 5193 table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
464dc801
EB
5194
5195 /* Don't export sysctls to unprivileged users */
5196 if (net->user_ns != &init_user_ns)
5197 table[0].procname = NULL;
5ee09105
YH
5198 }
5199
760f2d01
DL
5200 return table;
5201}
1da177e4
LT
5202#endif
5203
2c8c1e72 5204static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5205{
633d424b 5206 int ret = -ENOMEM;
8ed67789 5207
86393e52
AD
5208 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5209 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5210
fc66f95c
ED
5211 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5212 goto out_ip6_dst_ops;
5213
421842ed
DA
5214 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5215 sizeof(*net->ipv6.fib6_null_entry),
5216 GFP_KERNEL);
5217 if (!net->ipv6.fib6_null_entry)
5218 goto out_ip6_dst_entries;
5219
8ed67789
DL
5220 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5221 sizeof(*net->ipv6.ip6_null_entry),
5222 GFP_KERNEL);
5223 if (!net->ipv6.ip6_null_entry)
421842ed 5224 goto out_fib6_null_entry;
d8d1f30b 5225 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5226 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5227 ip6_template_metrics, true);
8ed67789
DL
5228
5229#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5230 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5231 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5232 sizeof(*net->ipv6.ip6_prohibit_entry),
5233 GFP_KERNEL);
68fffc67
PZ
5234 if (!net->ipv6.ip6_prohibit_entry)
5235 goto out_ip6_null_entry;
d8d1f30b 5236 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5237 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5238 ip6_template_metrics, true);
8ed67789
DL
5239
5240 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5241 sizeof(*net->ipv6.ip6_blk_hole_entry),
5242 GFP_KERNEL);
68fffc67
PZ
5243 if (!net->ipv6.ip6_blk_hole_entry)
5244 goto out_ip6_prohibit_entry;
d8d1f30b 5245 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5246 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5247 ip6_template_metrics, true);
8ed67789
DL
5248#endif
5249
b339a47c
PZ
5250 net->ipv6.sysctl.flush_delay = 0;
5251 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5252 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5253 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5254 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5255 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5256 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5257 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
7c6bb7d2 5258 net->ipv6.sysctl.skip_notify_on_dev_down = 0;
b339a47c 5259
6891a346
BT
5260 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5261
8ed67789
DL
5262 ret = 0;
5263out:
5264 return ret;
f2fc6a54 5265
68fffc67
PZ
5266#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5267out_ip6_prohibit_entry:
5268 kfree(net->ipv6.ip6_prohibit_entry);
5269out_ip6_null_entry:
5270 kfree(net->ipv6.ip6_null_entry);
5271#endif
421842ed
DA
5272out_fib6_null_entry:
5273 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5274out_ip6_dst_entries:
5275 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5276out_ip6_dst_ops:
f2fc6a54 5277 goto out;
cdb18761
DL
5278}
5279
2c8c1e72 5280static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5281{
421842ed 5282 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5283 kfree(net->ipv6.ip6_null_entry);
5284#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5285 kfree(net->ipv6.ip6_prohibit_entry);
5286 kfree(net->ipv6.ip6_blk_hole_entry);
5287#endif
41bb78b4 5288 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5289}
5290
d189634e
TG
5291static int __net_init ip6_route_net_init_late(struct net *net)
5292{
5293#ifdef CONFIG_PROC_FS
c3506372
CH
5294 proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5295 sizeof(struct ipv6_route_iter));
3617d949
CH
5296 proc_create_net_single("rt6_stats", 0444, net->proc_net,
5297 rt6_stats_seq_show, NULL);
d189634e
TG
5298#endif
5299 return 0;
5300}
5301
5302static void __net_exit ip6_route_net_exit_late(struct net *net)
5303{
5304#ifdef CONFIG_PROC_FS
ece31ffd
G
5305 remove_proc_entry("ipv6_route", net->proc_net);
5306 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5307#endif
5308}
5309
cdb18761
DL
5310static struct pernet_operations ip6_route_net_ops = {
5311 .init = ip6_route_net_init,
5312 .exit = ip6_route_net_exit,
5313};
5314
c3426b47
DM
5315static int __net_init ipv6_inetpeer_init(struct net *net)
5316{
5317 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5318
5319 if (!bp)
5320 return -ENOMEM;
5321 inet_peer_base_init(bp);
5322 net->ipv6.peers = bp;
5323 return 0;
5324}
5325
5326static void __net_exit ipv6_inetpeer_exit(struct net *net)
5327{
5328 struct inet_peer_base *bp = net->ipv6.peers;
5329
5330 net->ipv6.peers = NULL;
56a6b248 5331 inetpeer_invalidate_tree(bp);
c3426b47
DM
5332 kfree(bp);
5333}
5334
2b823f72 5335static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5336 .init = ipv6_inetpeer_init,
5337 .exit = ipv6_inetpeer_exit,
5338};
5339
d189634e
TG
5340static struct pernet_operations ip6_route_net_late_ops = {
5341 .init = ip6_route_net_init_late,
5342 .exit = ip6_route_net_exit_late,
5343};
5344
8ed67789
DL
5345static struct notifier_block ip6_route_dev_notifier = {
5346 .notifier_call = ip6_route_dev_notify,
242d3a49 5347 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5348};
5349
2f460933
WC
5350void __init ip6_route_init_special_entries(void)
5351{
5352 /* Registering of the loopback is done before this portion of code,
5353 * the loopback reference in rt6_info will not be taken, do it
5354 * manually for init_net */
421842ed 5355 init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
2f460933
WC
5356 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5357 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5358 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5359 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5360 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5361 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5362 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5363 #endif
5364}
5365
433d49c3 5366int __init ip6_route_init(void)
1da177e4 5367{
433d49c3 5368 int ret;
8d0b94af 5369 int cpu;
433d49c3 5370
9a7ec3a9
DL
5371 ret = -ENOMEM;
5372 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5373 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5374 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5375 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5376 goto out;
14e50e57 5377
fc66f95c 5378 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5379 if (ret)
bdb3289f 5380 goto out_kmem_cache;
bdb3289f 5381
c3426b47
DM
5382 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5383 if (ret)
e8803b6c 5384 goto out_dst_entries;
2a0c451a 5385
7e52b33b
DM
5386 ret = register_pernet_subsys(&ip6_route_net_ops);
5387 if (ret)
5388 goto out_register_inetpeer;
c3426b47 5389
5dc121e9
AE
5390 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5391
e8803b6c 5392 ret = fib6_init();
433d49c3 5393 if (ret)
8ed67789 5394 goto out_register_subsys;
433d49c3 5395
433d49c3
DL
5396 ret = xfrm6_init();
5397 if (ret)
e8803b6c 5398 goto out_fib6_init;
c35b7e72 5399
433d49c3
DL
5400 ret = fib6_rules_init();
5401 if (ret)
5402 goto xfrm6_init;
7e5449c2 5403
d189634e
TG
5404 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5405 if (ret)
5406 goto fib6_rules_init;
5407
16feebcf
FW
5408 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5409 inet6_rtm_newroute, NULL, 0);
5410 if (ret < 0)
5411 goto out_register_late_subsys;
5412
5413 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5414 inet6_rtm_delroute, NULL, 0);
5415 if (ret < 0)
5416 goto out_register_late_subsys;
5417
5418 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5419 inet6_rtm_getroute, NULL,
5420 RTNL_FLAG_DOIT_UNLOCKED);
5421 if (ret < 0)
d189634e 5422 goto out_register_late_subsys;
c127ea2c 5423
8ed67789 5424 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5425 if (ret)
d189634e 5426 goto out_register_late_subsys;
8ed67789 5427
8d0b94af
MKL
5428 for_each_possible_cpu(cpu) {
5429 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5430
5431 INIT_LIST_HEAD(&ul->head);
5432 spin_lock_init(&ul->lock);
5433 }
5434
433d49c3
DL
5435out:
5436 return ret;
5437
d189634e 5438out_register_late_subsys:
16feebcf 5439 rtnl_unregister_all(PF_INET6);
d189634e 5440 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5441fib6_rules_init:
433d49c3
DL
5442 fib6_rules_cleanup();
5443xfrm6_init:
433d49c3 5444 xfrm6_fini();
2a0c451a
TG
5445out_fib6_init:
5446 fib6_gc_cleanup();
8ed67789
DL
5447out_register_subsys:
5448 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5449out_register_inetpeer:
5450 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5451out_dst_entries:
5452 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5453out_kmem_cache:
f2fc6a54 5454 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5455 goto out;
1da177e4
LT
5456}
5457
5458void ip6_route_cleanup(void)
5459{
8ed67789 5460 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5461 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5462 fib6_rules_cleanup();
1da177e4 5463 xfrm6_fini();
1da177e4 5464 fib6_gc_cleanup();
c3426b47 5465 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5466 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5467 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5468 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5469}