]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - net/ipv6/route.c
net: inet_rtm_getroute() - use new style struct initializer instead of memset
[thirdparty/kernel/stable.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
eacb9384 66#include <net/ip.h>
7c0f6ba6 67#include <linux/uaccess.h>
1da177e4
LT
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
30d444d3
DA
73static int ip6_rt_type_to_error(u8 fib6_type);
74
75#define CREATE_TRACE_POINTS
76#include <trace/events/fib6.h>
77EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
78#undef CREATE_TRACE_POINTS
79
afc154e9 80enum rt6_nud_state {
7e980569
JB
81 RT6_NUD_FAIL_HARD = -3,
82 RT6_NUD_FAIL_PROBE = -2,
83 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
84 RT6_NUD_SUCCEED = 1
85};
86
1da177e4 87static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 88static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 89static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
569d3645 94static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
95
96static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 97static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 98static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 99static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 100static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
101static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
102 struct sk_buff *skb, u32 mtu);
103static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
104 struct sk_buff *skb);
8d1c802b
DA
105static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
106static size_t rt6_nlmsg_size(struct fib6_info *rt);
d4ead6b3 107static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 108 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 109 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
110 int iif, int type, u32 portid, u32 seq,
111 unsigned int flags);
8d1c802b 112static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
113 struct in6_addr *daddr,
114 struct in6_addr *saddr);
1da177e4 115
70ceb4f5 116#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 117static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 118 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
119 const struct in6_addr *gwaddr,
120 struct net_device *dev,
95c96174 121 unsigned int pref);
8d1c802b 122static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 123 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
124 const struct in6_addr *gwaddr,
125 struct net_device *dev);
70ceb4f5
YH
126#endif
127
8d0b94af
MKL
128struct uncached_list {
129 spinlock_t lock;
130 struct list_head head;
131};
132
133static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
134
510c321b 135void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
136{
137 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
138
8d0b94af
MKL
139 rt->rt6i_uncached_list = ul;
140
141 spin_lock_bh(&ul->lock);
142 list_add_tail(&rt->rt6i_uncached, &ul->head);
143 spin_unlock_bh(&ul->lock);
144}
145
510c321b 146void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
147{
148 if (!list_empty(&rt->rt6i_uncached)) {
149 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 150 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
151
152 spin_lock_bh(&ul->lock);
153 list_del(&rt->rt6i_uncached);
81eb8447 154 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
155 spin_unlock_bh(&ul->lock);
156 }
157}
158
159static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
160{
161 struct net_device *loopback_dev = net->loopback_dev;
162 int cpu;
163
e332bc67
EB
164 if (dev == loopback_dev)
165 return;
166
8d0b94af
MKL
167 for_each_possible_cpu(cpu) {
168 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
169 struct rt6_info *rt;
170
171 spin_lock_bh(&ul->lock);
172 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
173 struct inet6_dev *rt_idev = rt->rt6i_idev;
174 struct net_device *rt_dev = rt->dst.dev;
175
e332bc67 176 if (rt_idev->dev == dev) {
8d0b94af
MKL
177 rt->rt6i_idev = in6_dev_get(loopback_dev);
178 in6_dev_put(rt_idev);
179 }
180
e332bc67 181 if (rt_dev == dev) {
8d0b94af
MKL
182 rt->dst.dev = loopback_dev;
183 dev_hold(rt->dst.dev);
184 dev_put(rt_dev);
185 }
186 }
187 spin_unlock_bh(&ul->lock);
188 }
189}
190
f8a1b43b 191static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
192 struct sk_buff *skb,
193 const void *daddr)
39232973 194{
a7563f34 195 if (!ipv6_addr_any(p))
39232973 196 return (const void *) p;
f894cbf8
DM
197 else if (skb)
198 return &ipv6_hdr(skb)->daddr;
39232973
DM
199 return daddr;
200}
201
f8a1b43b
DA
202struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
203 struct net_device *dev,
204 struct sk_buff *skb,
205 const void *daddr)
d3aaeb38 206{
39232973
DM
207 struct neighbour *n;
208
f8a1b43b
DA
209 daddr = choose_neigh_daddr(gw, skb, daddr);
210 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
211 if (n)
212 return n;
f8a1b43b
DA
213 return neigh_create(&nd_tbl, daddr, dev);
214}
215
216static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
217 struct sk_buff *skb,
218 const void *daddr)
219{
220 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
221
222 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
f83c7790
DM
223}
224
63fca65d
JA
225static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
226{
227 struct net_device *dev = dst->dev;
228 struct rt6_info *rt = (struct rt6_info *)dst;
229
f8a1b43b 230 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
63fca65d
JA
231 if (!daddr)
232 return;
233 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
234 return;
235 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
236 return;
237 __ipv6_confirm_neigh(dev, daddr);
238}
239
9a7ec3a9 240static struct dst_ops ip6_dst_ops_template = {
1da177e4 241 .family = AF_INET6,
1da177e4
LT
242 .gc = ip6_dst_gc,
243 .gc_thresh = 1024,
244 .check = ip6_dst_check,
0dbaee3b 245 .default_advmss = ip6_default_advmss,
ebb762f2 246 .mtu = ip6_mtu,
d4ead6b3 247 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
248 .destroy = ip6_dst_destroy,
249 .ifdown = ip6_dst_ifdown,
250 .negative_advice = ip6_negative_advice,
251 .link_failure = ip6_link_failure,
252 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 253 .redirect = rt6_do_redirect,
9f8955cc 254 .local_out = __ip6_local_out,
f8a1b43b 255 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 256 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
257};
258
ebb762f2 259static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 260{
618f9bc7
SK
261 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
262
263 return mtu ? : dst->dev->mtu;
ec831ea7
RD
264}
265
6700c270
DM
266static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
267 struct sk_buff *skb, u32 mtu)
14e50e57
DM
268{
269}
270
6700c270
DM
271static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
272 struct sk_buff *skb)
b587ee3b
DM
273{
274}
275
14e50e57
DM
276static struct dst_ops ip6_dst_blackhole_ops = {
277 .family = AF_INET6,
14e50e57
DM
278 .destroy = ip6_dst_destroy,
279 .check = ip6_dst_check,
ebb762f2 280 .mtu = ip6_blackhole_mtu,
214f45c9 281 .default_advmss = ip6_default_advmss,
14e50e57 282 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 283 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 284 .cow_metrics = dst_cow_metrics_generic,
f8a1b43b 285 .neigh_lookup = ip6_dst_neigh_lookup,
14e50e57
DM
286};
287
62fa8a84 288static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 289 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
290};
291
8d1c802b 292static const struct fib6_info fib6_null_entry_template = {
93c2fb25
DA
293 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
294 .fib6_protocol = RTPROT_KERNEL,
295 .fib6_metric = ~(u32)0,
296 .fib6_ref = ATOMIC_INIT(1),
421842ed
DA
297 .fib6_type = RTN_UNREACHABLE,
298 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
299};
300
fb0af4c7 301static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
302 .dst = {
303 .__refcnt = ATOMIC_INIT(1),
304 .__use = 1,
2c20cbd7 305 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 306 .error = -ENETUNREACH,
d8d1f30b
CG
307 .input = ip6_pkt_discard,
308 .output = ip6_pkt_discard_out,
1da177e4
LT
309 },
310 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
1da177e4
LT
311};
312
101367c2
TG
313#ifdef CONFIG_IPV6_MULTIPLE_TABLES
314
fb0af4c7 315static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
316 .dst = {
317 .__refcnt = ATOMIC_INIT(1),
318 .__use = 1,
2c20cbd7 319 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 320 .error = -EACCES,
d8d1f30b
CG
321 .input = ip6_pkt_prohibit,
322 .output = ip6_pkt_prohibit_out,
101367c2
TG
323 },
324 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
325};
326
fb0af4c7 327static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
328 .dst = {
329 .__refcnt = ATOMIC_INIT(1),
330 .__use = 1,
2c20cbd7 331 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 332 .error = -EINVAL,
d8d1f30b 333 .input = dst_discard,
ede2059d 334 .output = dst_discard_out,
101367c2
TG
335 },
336 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
337};
338
339#endif
340
ebfa45f0
MKL
341static void rt6_info_init(struct rt6_info *rt)
342{
343 struct dst_entry *dst = &rt->dst;
344
345 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
ebfa45f0
MKL
346 INIT_LIST_HEAD(&rt->rt6i_uncached);
347}
348
1da177e4 349/* allocate dst with ip6_dst_ops */
93531c67
DA
350struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
351 int flags)
1da177e4 352{
97bab73f 353 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 354 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 355
81eb8447 356 if (rt) {
ebfa45f0 357 rt6_info_init(rt);
81eb8447
WW
358 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
359 }
8104891b 360
cf911662 361 return rt;
1da177e4 362}
9ab179d8 363EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 364
1da177e4
LT
365static void ip6_dst_destroy(struct dst_entry *dst)
366{
ce7ea4af 367 struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
1da177e4 368 struct rt6_info *rt = (struct rt6_info *)dst;
a68886a6 369 struct fib6_info *from;
8d0b94af 370 struct inet6_dev *idev;
1da177e4 371
ce7ea4af
WW
372 if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
373 kfree(p);
374
8d0b94af
MKL
375 rt6_uncached_list_del(rt);
376
377 idev = rt->rt6i_idev;
38308473 378 if (idev) {
1da177e4
LT
379 rt->rt6i_idev = NULL;
380 in6_dev_put(idev);
1ab1457c 381 }
1716a961 382
a68886a6
DA
383 rcu_read_lock();
384 from = rcu_dereference(rt->from);
385 rcu_assign_pointer(rt->from, NULL);
93531c67 386 fib6_info_release(from);
a68886a6 387 rcu_read_unlock();
b3419363
DM
388}
389
1da177e4
LT
390static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
391 int how)
392{
393 struct rt6_info *rt = (struct rt6_info *)dst;
394 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 395 struct net_device *loopback_dev =
c346dca1 396 dev_net(dev)->loopback_dev;
1da177e4 397
e5645f51
WW
398 if (idev && idev->dev != loopback_dev) {
399 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
400 if (loopback_idev) {
401 rt->rt6i_idev = loopback_idev;
402 in6_dev_put(idev);
97cac082 403 }
1da177e4
LT
404 }
405}
406
5973fb1e
MKL
407static bool __rt6_check_expired(const struct rt6_info *rt)
408{
409 if (rt->rt6i_flags & RTF_EXPIRES)
410 return time_after(jiffies, rt->dst.expires);
411 else
412 return false;
413}
414
a50feda5 415static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 416{
a68886a6
DA
417 struct fib6_info *from;
418
419 from = rcu_dereference(rt->from);
420
1716a961
G
421 if (rt->rt6i_flags & RTF_EXPIRES) {
422 if (time_after(jiffies, rt->dst.expires))
a50feda5 423 return true;
a68886a6 424 } else if (from) {
1e2ea8ad 425 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
a68886a6 426 fib6_check_expired(from);
1716a961 427 }
a50feda5 428 return false;
1da177e4
LT
429}
430
3b290a31
DA
431struct fib6_info *fib6_multipath_select(const struct net *net,
432 struct fib6_info *match,
433 struct flowi6 *fl6, int oif,
434 const struct sk_buff *skb,
435 int strict)
51ebd318 436{
8d1c802b 437 struct fib6_info *sibling, *next_sibling;
51ebd318 438
b673d6cc
JS
439 /* We might have already computed the hash for ICMPv6 errors. In such
440 * case it will always be non-zero. Otherwise now is the time to do it.
441 */
442 if (!fl6->mp_hash)
b4bac172 443 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 444
5e670d84 445 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
3d709f69
IS
446 return match;
447
93c2fb25
DA
448 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
449 fib6_siblings) {
5e670d84
DA
450 int nh_upper_bound;
451
452 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
453 if (fl6->mp_hash > nh_upper_bound)
3d709f69
IS
454 continue;
455 if (rt6_score_route(sibling, oif, strict) < 0)
456 break;
457 match = sibling;
458 break;
459 }
460
51ebd318
ND
461 return match;
462}
463
1da177e4 464/*
66f5d6ce 465 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
466 */
467
8d1c802b
DA
468static inline struct fib6_info *rt6_device_match(struct net *net,
469 struct fib6_info *rt,
b71d1d42 470 const struct in6_addr *saddr,
1da177e4 471 int oif,
d420895e 472 int flags)
1da177e4 473{
8d1c802b 474 struct fib6_info *sprt;
1da177e4 475
5e670d84
DA
476 if (!oif && ipv6_addr_any(saddr) &&
477 !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
8067bb8c 478 return rt;
dd3abc4e 479
8fb11a9a 480 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
5e670d84 481 const struct net_device *dev = sprt->fib6_nh.nh_dev;
dd3abc4e 482
5e670d84 483 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
484 continue;
485
dd3abc4e 486 if (oif) {
1da177e4
LT
487 if (dev->ifindex == oif)
488 return sprt;
dd3abc4e
YH
489 } else {
490 if (ipv6_chk_addr(net, saddr, dev,
491 flags & RT6_LOOKUP_F_IFACE))
492 return sprt;
1da177e4 493 }
dd3abc4e 494 }
1da177e4 495
eea68cd3
DA
496 if (oif && flags & RT6_LOOKUP_F_IFACE)
497 return net->ipv6.fib6_null_entry;
8067bb8c 498
421842ed 499 return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
1da177e4
LT
500}
501
27097255 502#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
503struct __rt6_probe_work {
504 struct work_struct work;
505 struct in6_addr target;
506 struct net_device *dev;
507};
508
509static void rt6_probe_deferred(struct work_struct *w)
510{
511 struct in6_addr mcaddr;
512 struct __rt6_probe_work *work =
513 container_of(w, struct __rt6_probe_work, work);
514
515 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 516 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 517 dev_put(work->dev);
662f5533 518 kfree(work);
c2f17e82
HFS
519}
520
8d1c802b 521static void rt6_probe(struct fib6_info *rt)
27097255 522{
990edb42 523 struct __rt6_probe_work *work;
5e670d84 524 const struct in6_addr *nh_gw;
f2c31e32 525 struct neighbour *neigh;
5e670d84
DA
526 struct net_device *dev;
527
27097255
YH
528 /*
529 * Okay, this does not seem to be appropriate
530 * for now, however, we need to check if it
531 * is really so; aka Router Reachability Probing.
532 *
533 * Router Reachability Probe MUST be rate-limited
534 * to no more than one per minute.
535 */
93c2fb25 536 if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
7ff74a59 537 return;
5e670d84
DA
538
539 nh_gw = &rt->fib6_nh.nh_gw;
540 dev = rt->fib6_nh.nh_dev;
2152caea 541 rcu_read_lock_bh();
5e670d84 542 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 543 if (neigh) {
dcd1f572
DA
544 struct inet6_dev *idev;
545
8d6c31bf
MKL
546 if (neigh->nud_state & NUD_VALID)
547 goto out;
548
dcd1f572 549 idev = __in6_dev_get(dev);
990edb42 550 work = NULL;
2152caea 551 write_lock(&neigh->lock);
990edb42
MKL
552 if (!(neigh->nud_state & NUD_VALID) &&
553 time_after(jiffies,
dcd1f572 554 neigh->updated + idev->cnf.rtr_probe_interval)) {
990edb42
MKL
555 work = kmalloc(sizeof(*work), GFP_ATOMIC);
556 if (work)
557 __neigh_set_probe_once(neigh);
c2f17e82 558 }
2152caea 559 write_unlock(&neigh->lock);
990edb42
MKL
560 } else {
561 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 562 }
990edb42
MKL
563
564 if (work) {
565 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
566 work->target = *nh_gw;
567 dev_hold(dev);
568 work->dev = dev;
990edb42
MKL
569 schedule_work(&work->work);
570 }
571
8d6c31bf 572out:
2152caea 573 rcu_read_unlock_bh();
27097255
YH
574}
575#else
8d1c802b 576static inline void rt6_probe(struct fib6_info *rt)
27097255 577{
27097255
YH
578}
579#endif
580
1da177e4 581/*
554cfb7e 582 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 583 */
8d1c802b 584static inline int rt6_check_dev(struct fib6_info *rt, int oif)
554cfb7e 585{
5e670d84
DA
586 const struct net_device *dev = rt->fib6_nh.nh_dev;
587
161980f4 588 if (!oif || dev->ifindex == oif)
554cfb7e 589 return 2;
161980f4 590 return 0;
554cfb7e 591}
1da177e4 592
8d1c802b 593static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
1da177e4 594{
afc154e9 595 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 596 struct neighbour *neigh;
f2c31e32 597
93c2fb25
DA
598 if (rt->fib6_flags & RTF_NONEXTHOP ||
599 !(rt->fib6_flags & RTF_GATEWAY))
afc154e9 600 return RT6_NUD_SUCCEED;
145a3621
YH
601
602 rcu_read_lock_bh();
5e670d84
DA
603 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
604 &rt->fib6_nh.nh_gw);
145a3621
YH
605 if (neigh) {
606 read_lock(&neigh->lock);
554cfb7e 607 if (neigh->nud_state & NUD_VALID)
afc154e9 608 ret = RT6_NUD_SUCCEED;
398bcbeb 609#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 610 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 611 ret = RT6_NUD_SUCCEED;
7e980569
JB
612 else
613 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 614#endif
145a3621 615 read_unlock(&neigh->lock);
afc154e9
HFS
616 } else {
617 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 618 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 619 }
145a3621
YH
620 rcu_read_unlock_bh();
621
a5a81f0b 622 return ret;
1da177e4
LT
623}
624
8d1c802b 625static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
1da177e4 626{
a5a81f0b 627 int m;
1ab1457c 628
4d0c5911 629 m = rt6_check_dev(rt, oif);
77d16f45 630 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 631 return RT6_NUD_FAIL_HARD;
ebacaaa0 632#ifdef CONFIG_IPV6_ROUTER_PREF
93c2fb25 633 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
ebacaaa0 634#endif
afc154e9
HFS
635 if (strict & RT6_LOOKUP_F_REACHABLE) {
636 int n = rt6_check_neigh(rt);
637 if (n < 0)
638 return n;
639 }
554cfb7e
YH
640 return m;
641}
642
dcd1f572
DA
643/* called with rc_read_lock held */
644static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
645{
646 const struct net_device *dev = fib6_info_nh_dev(f6i);
647 bool rc = false;
648
649 if (dev) {
650 const struct inet6_dev *idev = __in6_dev_get(dev);
651
652 rc = !!idev->cnf.ignore_routes_with_linkdown;
653 }
654
655 return rc;
656}
657
8d1c802b
DA
658static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
659 int *mpri, struct fib6_info *match,
afc154e9 660 bool *do_rr)
554cfb7e 661{
f11e6659 662 int m;
afc154e9 663 bool match_do_rr = false;
35103d11 664
5e670d84 665 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
666 goto out;
667
dcd1f572 668 if (fib6_ignore_linkdown(rt) &&
5e670d84 669 rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 670 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 671 goto out;
f11e6659 672
14895687 673 if (fib6_check_expired(rt))
f11e6659
DM
674 goto out;
675
676 m = rt6_score_route(rt, oif, strict);
7e980569 677 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
678 match_do_rr = true;
679 m = 0; /* lowest valid score */
7e980569 680 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 681 goto out;
afc154e9
HFS
682 }
683
684 if (strict & RT6_LOOKUP_F_REACHABLE)
685 rt6_probe(rt);
f11e6659 686
7e980569 687 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 688 if (m > *mpri) {
afc154e9 689 *do_rr = match_do_rr;
f11e6659
DM
690 *mpri = m;
691 match = rt;
f11e6659 692 }
f11e6659
DM
693out:
694 return match;
695}
696
8d1c802b
DA
697static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
698 struct fib6_info *leaf,
699 struct fib6_info *rr_head,
afc154e9
HFS
700 u32 metric, int oif, int strict,
701 bool *do_rr)
f11e6659 702{
8d1c802b 703 struct fib6_info *rt, *match, *cont;
554cfb7e 704 int mpri = -1;
1da177e4 705
f11e6659 706 match = NULL;
9fbdcfaf 707 cont = NULL;
8fb11a9a 708 for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 709 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
710 cont = rt;
711 break;
712 }
713
714 match = find_match(rt, oif, strict, &mpri, match, do_rr);
715 }
716
66f5d6ce 717 for (rt = leaf; rt && rt != rr_head;
8fb11a9a 718 rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 719 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
720 cont = rt;
721 break;
722 }
723
afc154e9 724 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
725 }
726
727 if (match || !cont)
728 return match;
729
8fb11a9a 730 for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
afc154e9 731 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 732
f11e6659
DM
733 return match;
734}
1da177e4 735
8d1c802b 736static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
8d1040e8 737 int oif, int strict)
f11e6659 738{
8d1c802b
DA
739 struct fib6_info *leaf = rcu_dereference(fn->leaf);
740 struct fib6_info *match, *rt0;
afc154e9 741 bool do_rr = false;
17ecf590 742 int key_plen;
1da177e4 743
421842ed
DA
744 if (!leaf || leaf == net->ipv6.fib6_null_entry)
745 return net->ipv6.fib6_null_entry;
8d1040e8 746
66f5d6ce 747 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 748 if (!rt0)
66f5d6ce 749 rt0 = leaf;
1da177e4 750
17ecf590
WW
751 /* Double check to make sure fn is not an intermediate node
752 * and fn->leaf does not points to its child's leaf
753 * (This might happen if all routes under fn are deleted from
754 * the tree and fib6_repair_tree() is called on the node.)
755 */
93c2fb25 756 key_plen = rt0->fib6_dst.plen;
17ecf590 757#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
758 if (rt0->fib6_src.plen)
759 key_plen = rt0->fib6_src.plen;
17ecf590
WW
760#endif
761 if (fn->fn_bit != key_plen)
421842ed 762 return net->ipv6.fib6_null_entry;
17ecf590 763
93c2fb25 764 match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
afc154e9 765 &do_rr);
1da177e4 766
afc154e9 767 if (do_rr) {
8fb11a9a 768 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
f11e6659 769
554cfb7e 770 /* no entries matched; do round-robin */
93c2fb25 771 if (!next || next->fib6_metric != rt0->fib6_metric)
8d1040e8 772 next = leaf;
f11e6659 773
66f5d6ce 774 if (next != rt0) {
93c2fb25 775 spin_lock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 776 /* make sure next is not being deleted from the tree */
93c2fb25 777 if (next->fib6_node)
66f5d6ce 778 rcu_assign_pointer(fn->rr_ptr, next);
93c2fb25 779 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 780 }
1da177e4 781 }
1da177e4 782
421842ed 783 return match ? match : net->ipv6.fib6_null_entry;
1da177e4
LT
784}
785
8d1c802b 786static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
8b9df265 787{
93c2fb25 788 return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
8b9df265
MKL
789}
790
70ceb4f5
YH
791#ifdef CONFIG_IPV6_ROUTE_INFO
792int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 793 const struct in6_addr *gwaddr)
70ceb4f5 794{
c346dca1 795 struct net *net = dev_net(dev);
70ceb4f5
YH
796 struct route_info *rinfo = (struct route_info *) opt;
797 struct in6_addr prefix_buf, *prefix;
798 unsigned int pref;
4bed72e4 799 unsigned long lifetime;
8d1c802b 800 struct fib6_info *rt;
70ceb4f5
YH
801
802 if (len < sizeof(struct route_info)) {
803 return -EINVAL;
804 }
805
806 /* Sanity check for prefix_len and length */
807 if (rinfo->length > 3) {
808 return -EINVAL;
809 } else if (rinfo->prefix_len > 128) {
810 return -EINVAL;
811 } else if (rinfo->prefix_len > 64) {
812 if (rinfo->length < 2) {
813 return -EINVAL;
814 }
815 } else if (rinfo->prefix_len > 0) {
816 if (rinfo->length < 1) {
817 return -EINVAL;
818 }
819 }
820
821 pref = rinfo->route_pref;
822 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 823 return -EINVAL;
70ceb4f5 824
4bed72e4 825 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
826
827 if (rinfo->length == 3)
828 prefix = (struct in6_addr *)rinfo->prefix;
829 else {
830 /* this function is safe */
831 ipv6_addr_prefix(&prefix_buf,
832 (struct in6_addr *)rinfo->prefix,
833 rinfo->prefix_len);
834 prefix = &prefix_buf;
835 }
836
f104a567 837 if (rinfo->prefix_len == 0)
afb1d4b5 838 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
839 else
840 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 841 gwaddr, dev);
70ceb4f5
YH
842
843 if (rt && !lifetime) {
afb1d4b5 844 ip6_del_rt(net, rt);
70ceb4f5
YH
845 rt = NULL;
846 }
847
848 if (!rt && lifetime)
830218c1
DA
849 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
850 dev, pref);
70ceb4f5 851 else if (rt)
93c2fb25
DA
852 rt->fib6_flags = RTF_ROUTEINFO |
853 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
70ceb4f5
YH
854
855 if (rt) {
1716a961 856 if (!addrconf_finite_timeout(lifetime))
14895687 857 fib6_clean_expires(rt);
1716a961 858 else
14895687 859 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 860
93531c67 861 fib6_info_release(rt);
70ceb4f5
YH
862 }
863 return 0;
864}
865#endif
866
ae90d867
DA
867/*
868 * Misc support functions
869 */
870
871/* called with rcu_lock held */
8d1c802b 872static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
ae90d867 873{
5e670d84 874 struct net_device *dev = rt->fib6_nh.nh_dev;
ae90d867 875
93c2fb25 876 if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
ae90d867
DA
877 /* for copies of local routes, dst->dev needs to be the
878 * device if it is a master device, the master device if
879 * device is enslaved, and the loopback as the default
880 */
881 if (netif_is_l3_slave(dev) &&
93c2fb25 882 !rt6_need_strict(&rt->fib6_dst.addr))
ae90d867
DA
883 dev = l3mdev_master_dev_rcu(dev);
884 else if (!netif_is_l3_master(dev))
885 dev = dev_net(dev)->loopback_dev;
886 /* last case is netif_is_l3_master(dev) is true in which
887 * case we want dev returned to be dev
888 */
889 }
890
891 return dev;
892}
893
6edb3c96
DA
894static const int fib6_prop[RTN_MAX + 1] = {
895 [RTN_UNSPEC] = 0,
896 [RTN_UNICAST] = 0,
897 [RTN_LOCAL] = 0,
898 [RTN_BROADCAST] = 0,
899 [RTN_ANYCAST] = 0,
900 [RTN_MULTICAST] = 0,
901 [RTN_BLACKHOLE] = -EINVAL,
902 [RTN_UNREACHABLE] = -EHOSTUNREACH,
903 [RTN_PROHIBIT] = -EACCES,
904 [RTN_THROW] = -EAGAIN,
905 [RTN_NAT] = -EINVAL,
906 [RTN_XRESOLVE] = -EINVAL,
907};
908
909static int ip6_rt_type_to_error(u8 fib6_type)
910{
911 return fib6_prop[fib6_type];
912}
913
8d1c802b 914static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
3b6761d1
DA
915{
916 unsigned short flags = 0;
917
918 if (rt->dst_nocount)
919 flags |= DST_NOCOUNT;
920 if (rt->dst_nopolicy)
921 flags |= DST_NOPOLICY;
922 if (rt->dst_host)
923 flags |= DST_HOST;
924
925 return flags;
926}
927
8d1c802b 928static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96
DA
929{
930 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
931
932 switch (ort->fib6_type) {
933 case RTN_BLACKHOLE:
934 rt->dst.output = dst_discard_out;
935 rt->dst.input = dst_discard;
936 break;
937 case RTN_PROHIBIT:
938 rt->dst.output = ip6_pkt_prohibit_out;
939 rt->dst.input = ip6_pkt_prohibit;
940 break;
941 case RTN_THROW:
942 case RTN_UNREACHABLE:
943 default:
944 rt->dst.output = ip6_pkt_discard_out;
945 rt->dst.input = ip6_pkt_discard;
946 break;
947 }
948}
949
8d1c802b 950static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96 951{
93c2fb25 952 if (ort->fib6_flags & RTF_REJECT) {
6edb3c96
DA
953 ip6_rt_init_dst_reject(rt, ort);
954 return;
955 }
956
957 rt->dst.error = 0;
958 rt->dst.output = ip6_output;
959
d23c4b63 960 if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
6edb3c96 961 rt->dst.input = ip6_input;
93c2fb25 962 } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
6edb3c96
DA
963 rt->dst.input = ip6_mc_input;
964 } else {
965 rt->dst.input = ip6_forward;
966 }
967
968 if (ort->fib6_nh.nh_lwtstate) {
969 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
970 lwtunnel_set_redirect(&rt->dst);
971 }
972
973 rt->dst.lastuse = jiffies;
974}
975
e873e4b9 976/* Caller must already hold reference to @from */
8d1c802b 977static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
ae90d867 978{
ae90d867 979 rt->rt6i_flags &= ~RTF_EXPIRES;
a68886a6 980 rcu_assign_pointer(rt->from, from);
d4ead6b3 981 dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
86758605
WW
982 if (from->fib6_metrics != &dst_default_metrics) {
983 rt->dst._metrics |= DST_METRICS_REFCOUNTED;
984 refcount_inc(&from->fib6_metrics->refcnt);
985 }
ae90d867
DA
986}
987
e873e4b9 988/* Caller must already hold reference to @ort */
8d1c802b 989static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
ae90d867 990{
dcd1f572
DA
991 struct net_device *dev = fib6_info_nh_dev(ort);
992
6edb3c96
DA
993 ip6_rt_init_dst(rt, ort);
994
93c2fb25 995 rt->rt6i_dst = ort->fib6_dst;
dcd1f572 996 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
5e670d84 997 rt->rt6i_gateway = ort->fib6_nh.nh_gw;
93c2fb25 998 rt->rt6i_flags = ort->fib6_flags;
ae90d867 999 rt6_set_from(rt, ort);
ae90d867 1000#ifdef CONFIG_IPV6_SUBTREES
93c2fb25 1001 rt->rt6i_src = ort->fib6_src;
ae90d867 1002#endif
ae90d867
DA
1003}
1004
a3c00e46
MKL
1005static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1006 struct in6_addr *saddr)
1007{
66f5d6ce 1008 struct fib6_node *pn, *sn;
a3c00e46
MKL
1009 while (1) {
1010 if (fn->fn_flags & RTN_TL_ROOT)
1011 return NULL;
66f5d6ce
WW
1012 pn = rcu_dereference(fn->parent);
1013 sn = FIB6_SUBTREE(pn);
1014 if (sn && sn != fn)
6454743b 1015 fn = fib6_node_lookup(sn, NULL, saddr);
a3c00e46
MKL
1016 else
1017 fn = pn;
1018 if (fn->fn_flags & RTN_RTINFO)
1019 return fn;
1020 }
1021}
c71099ac 1022
d3843fe5
WW
1023static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1024 bool null_fallback)
1025{
1026 struct rt6_info *rt = *prt;
1027
1028 if (dst_hold_safe(&rt->dst))
1029 return true;
1030 if (null_fallback) {
1031 rt = net->ipv6.ip6_null_entry;
1032 dst_hold(&rt->dst);
1033 } else {
1034 rt = NULL;
1035 }
1036 *prt = rt;
1037 return false;
1038}
1039
dec9b0e2 1040/* called with rcu_lock held */
8d1c802b 1041static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
dec9b0e2 1042{
3b6761d1 1043 unsigned short flags = fib6_info_dst_flags(rt);
dec9b0e2
DA
1044 struct net_device *dev = rt->fib6_nh.nh_dev;
1045 struct rt6_info *nrt;
1046
e873e4b9
WW
1047 if (!fib6_info_hold_safe(rt))
1048 return NULL;
1049
93531c67 1050 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
dec9b0e2
DA
1051 if (nrt)
1052 ip6_rt_copy_init(nrt, rt);
e873e4b9
WW
1053 else
1054 fib6_info_release(rt);
dec9b0e2
DA
1055
1056 return nrt;
1057}
1058
8ed67789
DL
1059static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1060 struct fib6_table *table,
b75cc8f9
DA
1061 struct flowi6 *fl6,
1062 const struct sk_buff *skb,
1063 int flags)
1da177e4 1064{
8d1c802b 1065 struct fib6_info *f6i;
1da177e4 1066 struct fib6_node *fn;
23fb93a4 1067 struct rt6_info *rt;
1da177e4 1068
b6cdbc85
DA
1069 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1070 flags &= ~RT6_LOOKUP_F_IFACE;
1071
66f5d6ce 1072 rcu_read_lock();
6454743b 1073 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1074restart:
23fb93a4
DA
1075 f6i = rcu_dereference(fn->leaf);
1076 if (!f6i) {
1077 f6i = net->ipv6.fib6_null_entry;
66f5d6ce 1078 } else {
23fb93a4 1079 f6i = rt6_device_match(net, f6i, &fl6->saddr,
66f5d6ce 1080 fl6->flowi6_oif, flags);
93c2fb25 1081 if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
3b290a31
DA
1082 f6i = fib6_multipath_select(net, f6i, fl6,
1083 fl6->flowi6_oif, skb,
1084 flags);
66f5d6ce 1085 }
23fb93a4 1086 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1087 fn = fib6_backtrack(fn, &fl6->saddr);
1088 if (fn)
1089 goto restart;
1090 }
2b760fcf 1091
d4bea421 1092 trace_fib6_table_lookup(net, f6i, table, fl6);
d3843fe5 1093
2b760fcf 1094 /* Search through exception table */
23fb93a4
DA
1095 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1096 if (rt) {
dec9b0e2
DA
1097 if (ip6_hold_safe(net, &rt, true))
1098 dst_use_noref(&rt->dst, jiffies);
23fb93a4 1099 } else if (f6i == net->ipv6.fib6_null_entry) {
dec9b0e2
DA
1100 rt = net->ipv6.ip6_null_entry;
1101 dst_hold(&rt->dst);
23fb93a4
DA
1102 } else {
1103 rt = ip6_create_rt_rcu(f6i);
1104 if (!rt) {
1105 rt = net->ipv6.ip6_null_entry;
1106 dst_hold(&rt->dst);
1107 }
dec9b0e2 1108 }
b811580d 1109
66f5d6ce 1110 rcu_read_unlock();
b811580d 1111
c71099ac 1112 return rt;
c71099ac
TG
1113}
1114
67ba4152 1115struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1116 const struct sk_buff *skb, int flags)
ea6e574e 1117{
b75cc8f9 1118 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1119}
1120EXPORT_SYMBOL_GPL(ip6_route_lookup);
1121
9acd9f3a 1122struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1123 const struct in6_addr *saddr, int oif,
1124 const struct sk_buff *skb, int strict)
c71099ac 1125{
4c9483b2
DM
1126 struct flowi6 fl6 = {
1127 .flowi6_oif = oif,
1128 .daddr = *daddr,
c71099ac
TG
1129 };
1130 struct dst_entry *dst;
77d16f45 1131 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1132
adaa70bb 1133 if (saddr) {
4c9483b2 1134 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1135 flags |= RT6_LOOKUP_F_HAS_SADDR;
1136 }
1137
b75cc8f9 1138 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1139 if (dst->error == 0)
1140 return (struct rt6_info *) dst;
1141
1142 dst_release(dst);
1143
1da177e4
LT
1144 return NULL;
1145}
7159039a
YH
1146EXPORT_SYMBOL(rt6_lookup);
1147
c71099ac 1148/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1149 * It takes new route entry, the addition fails by any reason the
1150 * route is released.
1151 * Caller must hold dst before calling it.
1da177e4
LT
1152 */
1153
8d1c802b 1154static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
333c4301 1155 struct netlink_ext_ack *extack)
1da177e4
LT
1156{
1157 int err;
c71099ac 1158 struct fib6_table *table;
1da177e4 1159
93c2fb25 1160 table = rt->fib6_table;
66f5d6ce 1161 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1162 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1163 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1164
1165 return err;
1166}
1167
8d1c802b 1168int ip6_ins_rt(struct net *net, struct fib6_info *rt)
40e22e8f 1169{
afb1d4b5 1170 struct nl_info info = { .nl_net = net, };
e715b6d3 1171
d4ead6b3 1172 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1173}
1174
8d1c802b 1175static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
8b9df265
MKL
1176 const struct in6_addr *daddr,
1177 const struct in6_addr *saddr)
1da177e4 1178{
4832c30d 1179 struct net_device *dev;
1da177e4
LT
1180 struct rt6_info *rt;
1181
1182 /*
1183 * Clone the route.
1184 */
1185
e873e4b9
WW
1186 if (!fib6_info_hold_safe(ort))
1187 return NULL;
1188
4832c30d 1189 dev = ip6_rt_get_dev_rcu(ort);
93531c67 1190 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
e873e4b9
WW
1191 if (!rt) {
1192 fib6_info_release(ort);
83a09abd 1193 return NULL;
e873e4b9 1194 }
83a09abd
MKL
1195
1196 ip6_rt_copy_init(rt, ort);
1197 rt->rt6i_flags |= RTF_CACHE;
83a09abd
MKL
1198 rt->dst.flags |= DST_HOST;
1199 rt->rt6i_dst.addr = *daddr;
1200 rt->rt6i_dst.plen = 128;
1da177e4 1201
83a09abd 1202 if (!rt6_is_gw_or_nonexthop(ort)) {
93c2fb25
DA
1203 if (ort->fib6_dst.plen != 128 &&
1204 ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
83a09abd 1205 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1206#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1207 if (rt->rt6i_src.plen && saddr) {
1208 rt->rt6i_src.addr = *saddr;
1209 rt->rt6i_src.plen = 128;
8b9df265 1210 }
83a09abd 1211#endif
95a9a5ba 1212 }
1da177e4 1213
95a9a5ba
YH
1214 return rt;
1215}
1da177e4 1216
8d1c802b 1217static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
d52d3997 1218{
3b6761d1 1219 unsigned short flags = fib6_info_dst_flags(rt);
4832c30d 1220 struct net_device *dev;
d52d3997
MKL
1221 struct rt6_info *pcpu_rt;
1222
e873e4b9
WW
1223 if (!fib6_info_hold_safe(rt))
1224 return NULL;
1225
4832c30d
DA
1226 rcu_read_lock();
1227 dev = ip6_rt_get_dev_rcu(rt);
93531c67 1228 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1229 rcu_read_unlock();
e873e4b9
WW
1230 if (!pcpu_rt) {
1231 fib6_info_release(rt);
d52d3997 1232 return NULL;
e873e4b9 1233 }
d52d3997 1234 ip6_rt_copy_init(pcpu_rt, rt);
d52d3997
MKL
1235 pcpu_rt->rt6i_flags |= RTF_PCPU;
1236 return pcpu_rt;
1237}
1238
66f5d6ce 1239/* It should be called with rcu_read_lock() acquired */
8d1c802b 1240static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
d52d3997 1241{
a73e4195 1242 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1243
1244 p = this_cpu_ptr(rt->rt6i_pcpu);
1245 pcpu_rt = *p;
1246
d4ead6b3
DA
1247 if (pcpu_rt)
1248 ip6_hold_safe(NULL, &pcpu_rt, false);
d3843fe5 1249
a73e4195
MKL
1250 return pcpu_rt;
1251}
1252
afb1d4b5 1253static struct rt6_info *rt6_make_pcpu_route(struct net *net,
8d1c802b 1254 struct fib6_info *rt)
a73e4195
MKL
1255{
1256 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1257
1258 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1259 if (!pcpu_rt) {
9c7370a1
MKL
1260 dst_hold(&net->ipv6.ip6_null_entry->dst);
1261 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1262 }
1263
a94b9367
WW
1264 dst_hold(&pcpu_rt->dst);
1265 p = this_cpu_ptr(rt->rt6i_pcpu);
1266 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1267 BUG_ON(prev);
a94b9367 1268
d52d3997
MKL
1269 return pcpu_rt;
1270}
1271
35732d01
WW
1272/* exception hash table implementation
1273 */
1274static DEFINE_SPINLOCK(rt6_exception_lock);
1275
1276/* Remove rt6_ex from hash table and free the memory
1277 * Caller must hold rt6_exception_lock
1278 */
1279static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1280 struct rt6_exception *rt6_ex)
1281{
b2427e67 1282 struct net *net;
81eb8447 1283
35732d01
WW
1284 if (!bucket || !rt6_ex)
1285 return;
b2427e67
CIK
1286
1287 net = dev_net(rt6_ex->rt6i->dst.dev);
35732d01 1288 hlist_del_rcu(&rt6_ex->hlist);
77634cc6 1289 dst_release(&rt6_ex->rt6i->dst);
35732d01
WW
1290 kfree_rcu(rt6_ex, rcu);
1291 WARN_ON_ONCE(!bucket->depth);
1292 bucket->depth--;
81eb8447 1293 net->ipv6.rt6_stats->fib_rt_cache--;
35732d01
WW
1294}
1295
1296/* Remove oldest rt6_ex in bucket and free the memory
1297 * Caller must hold rt6_exception_lock
1298 */
1299static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1300{
1301 struct rt6_exception *rt6_ex, *oldest = NULL;
1302
1303 if (!bucket)
1304 return;
1305
1306 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1307 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1308 oldest = rt6_ex;
1309 }
1310 rt6_remove_exception(bucket, oldest);
1311}
1312
1313static u32 rt6_exception_hash(const struct in6_addr *dst,
1314 const struct in6_addr *src)
1315{
1316 static u32 seed __read_mostly;
1317 u32 val;
1318
1319 net_get_random_once(&seed, sizeof(seed));
1320 val = jhash(dst, sizeof(*dst), seed);
1321
1322#ifdef CONFIG_IPV6_SUBTREES
1323 if (src)
1324 val = jhash(src, sizeof(*src), val);
1325#endif
1326 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1327}
1328
1329/* Helper function to find the cached rt in the hash table
1330 * and update bucket pointer to point to the bucket for this
1331 * (daddr, saddr) pair
1332 * Caller must hold rt6_exception_lock
1333 */
1334static struct rt6_exception *
1335__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1336 const struct in6_addr *daddr,
1337 const struct in6_addr *saddr)
1338{
1339 struct rt6_exception *rt6_ex;
1340 u32 hval;
1341
1342 if (!(*bucket) || !daddr)
1343 return NULL;
1344
1345 hval = rt6_exception_hash(daddr, saddr);
1346 *bucket += hval;
1347
1348 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1349 struct rt6_info *rt6 = rt6_ex->rt6i;
1350 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1351
1352#ifdef CONFIG_IPV6_SUBTREES
1353 if (matched && saddr)
1354 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1355#endif
1356 if (matched)
1357 return rt6_ex;
1358 }
1359 return NULL;
1360}
1361
1362/* Helper function to find the cached rt in the hash table
1363 * and update bucket pointer to point to the bucket for this
1364 * (daddr, saddr) pair
1365 * Caller must hold rcu_read_lock()
1366 */
1367static struct rt6_exception *
1368__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1369 const struct in6_addr *daddr,
1370 const struct in6_addr *saddr)
1371{
1372 struct rt6_exception *rt6_ex;
1373 u32 hval;
1374
1375 WARN_ON_ONCE(!rcu_read_lock_held());
1376
1377 if (!(*bucket) || !daddr)
1378 return NULL;
1379
1380 hval = rt6_exception_hash(daddr, saddr);
1381 *bucket += hval;
1382
1383 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1384 struct rt6_info *rt6 = rt6_ex->rt6i;
1385 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1386
1387#ifdef CONFIG_IPV6_SUBTREES
1388 if (matched && saddr)
1389 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1390#endif
1391 if (matched)
1392 return rt6_ex;
1393 }
1394 return NULL;
1395}
1396
8d1c802b 1397static unsigned int fib6_mtu(const struct fib6_info *rt)
d4ead6b3
DA
1398{
1399 unsigned int mtu;
1400
dcd1f572
DA
1401 if (rt->fib6_pmtu) {
1402 mtu = rt->fib6_pmtu;
1403 } else {
1404 struct net_device *dev = fib6_info_nh_dev(rt);
1405 struct inet6_dev *idev;
1406
1407 rcu_read_lock();
1408 idev = __in6_dev_get(dev);
1409 mtu = idev->cnf.mtu6;
1410 rcu_read_unlock();
1411 }
1412
d4ead6b3
DA
1413 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1414
1415 return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1416}
1417
35732d01 1418static int rt6_insert_exception(struct rt6_info *nrt,
8d1c802b 1419 struct fib6_info *ort)
35732d01 1420{
5e670d84 1421 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1422 struct rt6_exception_bucket *bucket;
1423 struct in6_addr *src_key = NULL;
1424 struct rt6_exception *rt6_ex;
1425 int err = 0;
1426
35732d01
WW
1427 spin_lock_bh(&rt6_exception_lock);
1428
1429 if (ort->exception_bucket_flushed) {
1430 err = -EINVAL;
1431 goto out;
1432 }
1433
1434 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1435 lockdep_is_held(&rt6_exception_lock));
1436 if (!bucket) {
1437 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1438 GFP_ATOMIC);
1439 if (!bucket) {
1440 err = -ENOMEM;
1441 goto out;
1442 }
1443 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1444 }
1445
1446#ifdef CONFIG_IPV6_SUBTREES
1447 /* rt6i_src.plen != 0 indicates ort is in subtree
1448 * and exception table is indexed by a hash of
1449 * both rt6i_dst and rt6i_src.
1450 * Otherwise, the exception table is indexed by
1451 * a hash of only rt6i_dst.
1452 */
93c2fb25 1453 if (ort->fib6_src.plen)
35732d01
WW
1454 src_key = &nrt->rt6i_src.addr;
1455#endif
f5bbe7ee
WW
1456 /* rt6_mtu_change() might lower mtu on ort.
1457 * Only insert this exception route if its mtu
1458 * is less than ort's mtu value.
1459 */
d4ead6b3 1460 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
f5bbe7ee
WW
1461 err = -EINVAL;
1462 goto out;
1463 }
60006a48 1464
35732d01
WW
1465 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1466 src_key);
1467 if (rt6_ex)
1468 rt6_remove_exception(bucket, rt6_ex);
1469
1470 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1471 if (!rt6_ex) {
1472 err = -ENOMEM;
1473 goto out;
1474 }
1475 rt6_ex->rt6i = nrt;
1476 rt6_ex->stamp = jiffies;
35732d01
WW
1477 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1478 bucket->depth++;
81eb8447 1479 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1480
1481 if (bucket->depth > FIB6_MAX_DEPTH)
1482 rt6_exception_remove_oldest(bucket);
1483
1484out:
1485 spin_unlock_bh(&rt6_exception_lock);
1486
1487 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1488 if (!err) {
93c2fb25 1489 spin_lock_bh(&ort->fib6_table->tb6_lock);
7aef6859 1490 fib6_update_sernum(net, ort);
93c2fb25 1491 spin_unlock_bh(&ort->fib6_table->tb6_lock);
b886d5f2
PA
1492 fib6_force_start_gc(net);
1493 }
35732d01
WW
1494
1495 return err;
1496}
1497
8d1c802b 1498void rt6_flush_exceptions(struct fib6_info *rt)
35732d01
WW
1499{
1500 struct rt6_exception_bucket *bucket;
1501 struct rt6_exception *rt6_ex;
1502 struct hlist_node *tmp;
1503 int i;
1504
1505 spin_lock_bh(&rt6_exception_lock);
1506 /* Prevent rt6_insert_exception() to recreate the bucket list */
1507 rt->exception_bucket_flushed = 1;
1508
1509 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1510 lockdep_is_held(&rt6_exception_lock));
1511 if (!bucket)
1512 goto out;
1513
1514 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1515 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1516 rt6_remove_exception(bucket, rt6_ex);
1517 WARN_ON_ONCE(bucket->depth);
1518 bucket++;
1519 }
1520
1521out:
1522 spin_unlock_bh(&rt6_exception_lock);
1523}
1524
1525/* Find cached rt in the hash table inside passed in rt
1526 * Caller has to hold rcu_read_lock()
1527 */
8d1c802b 1528static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
1529 struct in6_addr *daddr,
1530 struct in6_addr *saddr)
1531{
1532 struct rt6_exception_bucket *bucket;
1533 struct in6_addr *src_key = NULL;
1534 struct rt6_exception *rt6_ex;
1535 struct rt6_info *res = NULL;
1536
1537 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1538
1539#ifdef CONFIG_IPV6_SUBTREES
1540 /* rt6i_src.plen != 0 indicates rt is in subtree
1541 * and exception table is indexed by a hash of
1542 * both rt6i_dst and rt6i_src.
1543 * Otherwise, the exception table is indexed by
1544 * a hash of only rt6i_dst.
1545 */
93c2fb25 1546 if (rt->fib6_src.plen)
35732d01
WW
1547 src_key = saddr;
1548#endif
1549 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1550
1551 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1552 res = rt6_ex->rt6i;
1553
1554 return res;
1555}
1556
1557/* Remove the passed in cached rt from the hash table that contains it */
23fb93a4 1558static int rt6_remove_exception_rt(struct rt6_info *rt)
35732d01 1559{
35732d01
WW
1560 struct rt6_exception_bucket *bucket;
1561 struct in6_addr *src_key = NULL;
1562 struct rt6_exception *rt6_ex;
8a14e46f 1563 struct fib6_info *from;
35732d01
WW
1564 int err;
1565
091311de 1566 from = rcu_dereference(rt->from);
35732d01 1567 if (!from ||
442d713b 1568 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1569 return -EINVAL;
1570
1571 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1572 return -ENOENT;
1573
1574 spin_lock_bh(&rt6_exception_lock);
1575 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1576 lockdep_is_held(&rt6_exception_lock));
1577#ifdef CONFIG_IPV6_SUBTREES
1578 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1579 * and exception table is indexed by a hash of
1580 * both rt6i_dst and rt6i_src.
1581 * Otherwise, the exception table is indexed by
1582 * a hash of only rt6i_dst.
1583 */
93c2fb25 1584 if (from->fib6_src.plen)
35732d01
WW
1585 src_key = &rt->rt6i_src.addr;
1586#endif
1587 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1588 &rt->rt6i_dst.addr,
1589 src_key);
1590 if (rt6_ex) {
1591 rt6_remove_exception(bucket, rt6_ex);
1592 err = 0;
1593 } else {
1594 err = -ENOENT;
1595 }
1596
1597 spin_unlock_bh(&rt6_exception_lock);
1598 return err;
1599}
1600
1601/* Find rt6_ex which contains the passed in rt cache and
1602 * refresh its stamp
1603 */
1604static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1605{
35732d01 1606 struct rt6_exception_bucket *bucket;
8d1c802b 1607 struct fib6_info *from = rt->from;
35732d01
WW
1608 struct in6_addr *src_key = NULL;
1609 struct rt6_exception *rt6_ex;
1610
1611 if (!from ||
442d713b 1612 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1613 return;
1614
1615 rcu_read_lock();
1616 bucket = rcu_dereference(from->rt6i_exception_bucket);
1617
1618#ifdef CONFIG_IPV6_SUBTREES
1619 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1620 * and exception table is indexed by a hash of
1621 * both rt6i_dst and rt6i_src.
1622 * Otherwise, the exception table is indexed by
1623 * a hash of only rt6i_dst.
1624 */
93c2fb25 1625 if (from->fib6_src.plen)
35732d01
WW
1626 src_key = &rt->rt6i_src.addr;
1627#endif
1628 rt6_ex = __rt6_find_exception_rcu(&bucket,
1629 &rt->rt6i_dst.addr,
1630 src_key);
1631 if (rt6_ex)
1632 rt6_ex->stamp = jiffies;
1633
1634 rcu_read_unlock();
1635}
1636
e9fa1495
SB
1637static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1638 struct rt6_info *rt, int mtu)
1639{
1640 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1641 * lowest MTU in the path: always allow updating the route PMTU to
1642 * reflect PMTU decreases.
1643 *
1644 * If the new MTU is higher, and the route PMTU is equal to the local
1645 * MTU, this means the old MTU is the lowest in the path, so allow
1646 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1647 * handle this.
1648 */
1649
1650 if (dst_mtu(&rt->dst) >= mtu)
1651 return true;
1652
1653 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1654 return true;
1655
1656 return false;
1657}
1658
1659static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
8d1c802b 1660 struct fib6_info *rt, int mtu)
f5bbe7ee
WW
1661{
1662 struct rt6_exception_bucket *bucket;
1663 struct rt6_exception *rt6_ex;
1664 int i;
1665
1666 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1667 lockdep_is_held(&rt6_exception_lock));
1668
e9fa1495
SB
1669 if (!bucket)
1670 return;
1671
1672 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1673 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1674 struct rt6_info *entry = rt6_ex->rt6i;
1675
1676 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1677 * route), the metrics of its rt->from have already
e9fa1495
SB
1678 * been updated.
1679 */
d4ead6b3 1680 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1681 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1682 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1683 }
e9fa1495 1684 bucket++;
f5bbe7ee
WW
1685 }
1686}
1687
b16cb459
WW
1688#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1689
8d1c802b 1690static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
b16cb459
WW
1691 struct in6_addr *gateway)
1692{
1693 struct rt6_exception_bucket *bucket;
1694 struct rt6_exception *rt6_ex;
1695 struct hlist_node *tmp;
1696 int i;
1697
1698 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1699 return;
1700
1701 spin_lock_bh(&rt6_exception_lock);
1702 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1703 lockdep_is_held(&rt6_exception_lock));
1704
1705 if (bucket) {
1706 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1707 hlist_for_each_entry_safe(rt6_ex, tmp,
1708 &bucket->chain, hlist) {
1709 struct rt6_info *entry = rt6_ex->rt6i;
1710
1711 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1712 RTF_CACHE_GATEWAY &&
1713 ipv6_addr_equal(gateway,
1714 &entry->rt6i_gateway)) {
1715 rt6_remove_exception(bucket, rt6_ex);
1716 }
1717 }
1718 bucket++;
1719 }
1720 }
1721
1722 spin_unlock_bh(&rt6_exception_lock);
1723}
1724
c757faa8
WW
1725static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1726 struct rt6_exception *rt6_ex,
1727 struct fib6_gc_args *gc_args,
1728 unsigned long now)
1729{
1730 struct rt6_info *rt = rt6_ex->rt6i;
1731
1859bac0
PA
1732 /* we are pruning and obsoleting aged-out and non gateway exceptions
1733 * even if others have still references to them, so that on next
1734 * dst_check() such references can be dropped.
1735 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1736 * expired, independently from their aging, as per RFC 8201 section 4
1737 */
31afeb42
WW
1738 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1739 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1740 RT6_TRACE("aging clone %p\n", rt);
1741 rt6_remove_exception(bucket, rt6_ex);
1742 return;
1743 }
1744 } else if (time_after(jiffies, rt->dst.expires)) {
1745 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1746 rt6_remove_exception(bucket, rt6_ex);
1747 return;
31afeb42
WW
1748 }
1749
1750 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1751 struct neighbour *neigh;
1752 __u8 neigh_flags = 0;
1753
1bfa26ff
ED
1754 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1755 if (neigh)
c757faa8 1756 neigh_flags = neigh->flags;
1bfa26ff 1757
c757faa8
WW
1758 if (!(neigh_flags & NTF_ROUTER)) {
1759 RT6_TRACE("purging route %p via non-router but gateway\n",
1760 rt);
1761 rt6_remove_exception(bucket, rt6_ex);
1762 return;
1763 }
1764 }
31afeb42 1765
c757faa8
WW
1766 gc_args->more++;
1767}
1768
8d1c802b 1769void rt6_age_exceptions(struct fib6_info *rt,
c757faa8
WW
1770 struct fib6_gc_args *gc_args,
1771 unsigned long now)
1772{
1773 struct rt6_exception_bucket *bucket;
1774 struct rt6_exception *rt6_ex;
1775 struct hlist_node *tmp;
1776 int i;
1777
1778 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1779 return;
1780
1bfa26ff
ED
1781 rcu_read_lock_bh();
1782 spin_lock(&rt6_exception_lock);
c757faa8
WW
1783 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1784 lockdep_is_held(&rt6_exception_lock));
1785
1786 if (bucket) {
1787 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1788 hlist_for_each_entry_safe(rt6_ex, tmp,
1789 &bucket->chain, hlist) {
1790 rt6_age_examine_exception(bucket, rt6_ex,
1791 gc_args, now);
1792 }
1793 bucket++;
1794 }
1795 }
1bfa26ff
ED
1796 spin_unlock(&rt6_exception_lock);
1797 rcu_read_unlock_bh();
c757faa8
WW
1798}
1799
1d053da9
DA
1800/* must be called with rcu lock held */
1801struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
1802 int oif, struct flowi6 *fl6, int strict)
1da177e4 1803{
367efcb9 1804 struct fib6_node *fn, *saved_fn;
8d1c802b 1805 struct fib6_info *f6i;
1da177e4 1806
6454743b 1807 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1808 saved_fn = fn;
1da177e4 1809
ca254490
DA
1810 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1811 oif = 0;
1812
a3c00e46 1813redo_rt6_select:
23fb93a4 1814 f6i = rt6_select(net, fn, oif, strict);
23fb93a4 1815 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1816 fn = fib6_backtrack(fn, &fl6->saddr);
1817 if (fn)
1818 goto redo_rt6_select;
367efcb9
MKL
1819 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1820 /* also consider unreachable route */
1821 strict &= ~RT6_LOOKUP_F_REACHABLE;
1822 fn = saved_fn;
1823 goto redo_rt6_select;
367efcb9 1824 }
a3c00e46
MKL
1825 }
1826
d4bea421 1827 trace_fib6_table_lookup(net, f6i, table, fl6);
fb9de91e 1828
1d053da9
DA
1829 return f6i;
1830}
1831
1832struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1833 int oif, struct flowi6 *fl6,
1834 const struct sk_buff *skb, int flags)
1835{
1836 struct fib6_info *f6i;
1837 struct rt6_info *rt;
1838 int strict = 0;
1839
1840 strict |= flags & RT6_LOOKUP_F_IFACE;
1841 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1842 if (net->ipv6.devconf_all->forwarding == 0)
1843 strict |= RT6_LOOKUP_F_REACHABLE;
1844
1845 rcu_read_lock();
1846
1847 f6i = fib6_table_lookup(net, table, oif, fl6, strict);
1848 if (f6i->fib6_nsiblings)
1849 f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
1850
23fb93a4 1851 if (f6i == net->ipv6.fib6_null_entry) {
421842ed 1852 rt = net->ipv6.ip6_null_entry;
66f5d6ce 1853 rcu_read_unlock();
d3843fe5 1854 dst_hold(&rt->dst);
d3843fe5 1855 return rt;
23fb93a4
DA
1856 }
1857
1858 /*Search through exception table */
1859 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1860 if (rt) {
d4ead6b3 1861 if (ip6_hold_safe(net, &rt, true))
d3843fe5 1862 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 1863
66f5d6ce 1864 rcu_read_unlock();
d52d3997 1865 return rt;
3da59bd9 1866 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
93c2fb25 1867 !(f6i->fib6_flags & RTF_GATEWAY))) {
3da59bd9
MKL
1868 /* Create a RTF_CACHE clone which will not be
1869 * owned by the fib6 tree. It is for the special case where
1870 * the daddr in the skb during the neighbor look-up is different
1871 * from the fl6->daddr used to look-up route here.
1872 */
3da59bd9
MKL
1873 struct rt6_info *uncached_rt;
1874
23fb93a4 1875 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
d52d3997 1876
4d85cd0c 1877 rcu_read_unlock();
c71099ac 1878
1cfb71ee
WW
1879 if (uncached_rt) {
1880 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1881 * No need for another dst_hold()
1882 */
8d0b94af 1883 rt6_uncached_list_add(uncached_rt);
81eb8447 1884 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1885 } else {
3da59bd9 1886 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1887 dst_hold(&uncached_rt->dst);
1888 }
b811580d 1889
3da59bd9 1890 return uncached_rt;
d52d3997
MKL
1891 } else {
1892 /* Get a percpu copy */
1893
1894 struct rt6_info *pcpu_rt;
1895
951f788a 1896 local_bh_disable();
23fb93a4 1897 pcpu_rt = rt6_get_pcpu_route(f6i);
d52d3997 1898
93531c67
DA
1899 if (!pcpu_rt)
1900 pcpu_rt = rt6_make_pcpu_route(net, f6i);
1901
951f788a
ED
1902 local_bh_enable();
1903 rcu_read_unlock();
d4bea421 1904
d52d3997
MKL
1905 return pcpu_rt;
1906 }
1da177e4 1907}
9ff74384 1908EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1909
b75cc8f9
DA
1910static struct rt6_info *ip6_pol_route_input(struct net *net,
1911 struct fib6_table *table,
1912 struct flowi6 *fl6,
1913 const struct sk_buff *skb,
1914 int flags)
4acad72d 1915{
b75cc8f9 1916 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1917}
1918
d409b847
MB
1919struct dst_entry *ip6_route_input_lookup(struct net *net,
1920 struct net_device *dev,
b75cc8f9
DA
1921 struct flowi6 *fl6,
1922 const struct sk_buff *skb,
1923 int flags)
72331bc0
SL
1924{
1925 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1926 flags |= RT6_LOOKUP_F_IFACE;
1927
b75cc8f9 1928 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1929}
d409b847 1930EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1931
23aebdac 1932static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1933 struct flow_keys *keys,
1934 struct flow_keys *flkeys)
23aebdac
JS
1935{
1936 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1937 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1938 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1939 const struct ipv6hdr *inner_iph;
1940 const struct icmp6hdr *icmph;
1941 struct ipv6hdr _inner_iph;
cea67a2d 1942 struct icmp6hdr _icmph;
23aebdac
JS
1943
1944 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1945 goto out;
1946
cea67a2d
ED
1947 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1948 sizeof(_icmph), &_icmph);
1949 if (!icmph)
1950 goto out;
1951
23aebdac
JS
1952 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1953 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1954 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1955 icmph->icmp6_type != ICMPV6_PARAMPROB)
1956 goto out;
1957
1958 inner_iph = skb_header_pointer(skb,
1959 skb_transport_offset(skb) + sizeof(*icmph),
1960 sizeof(_inner_iph), &_inner_iph);
1961 if (!inner_iph)
1962 goto out;
1963
1964 key_iph = inner_iph;
5e5d6fed 1965 _flkeys = NULL;
23aebdac 1966out:
5e5d6fed
RP
1967 if (_flkeys) {
1968 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1969 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1970 keys->tags.flow_label = _flkeys->tags.flow_label;
1971 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1972 } else {
1973 keys->addrs.v6addrs.src = key_iph->saddr;
1974 keys->addrs.v6addrs.dst = key_iph->daddr;
fa1be7e0 1975 keys->tags.flow_label = ip6_flowlabel(key_iph);
5e5d6fed
RP
1976 keys->basic.ip_proto = key_iph->nexthdr;
1977 }
23aebdac
JS
1978}
1979
1980/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
1981u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1982 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
1983{
1984 struct flow_keys hash_keys;
9a2a537a 1985 u32 mhash;
23aebdac 1986
bbfa047a 1987 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
1988 case 0:
1989 memset(&hash_keys, 0, sizeof(hash_keys));
1990 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1991 if (skb) {
1992 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
1993 } else {
1994 hash_keys.addrs.v6addrs.src = fl6->saddr;
1995 hash_keys.addrs.v6addrs.dst = fl6->daddr;
fa1be7e0 1996 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
b4bac172
DA
1997 hash_keys.basic.ip_proto = fl6->flowi6_proto;
1998 }
1999 break;
2000 case 1:
2001 if (skb) {
2002 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2003 struct flow_keys keys;
2004
2005 /* short-circuit if we already have L4 hash present */
2006 if (skb->l4_hash)
2007 return skb_get_hash_raw(skb) >> 1;
2008
2009 memset(&hash_keys, 0, sizeof(hash_keys));
2010
2011 if (!flkeys) {
2012 skb_flow_dissect_flow_keys(skb, &keys, flag);
2013 flkeys = &keys;
2014 }
2015 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2016 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2017 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2018 hash_keys.ports.src = flkeys->ports.src;
2019 hash_keys.ports.dst = flkeys->ports.dst;
2020 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2021 } else {
2022 memset(&hash_keys, 0, sizeof(hash_keys));
2023 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2024 hash_keys.addrs.v6addrs.src = fl6->saddr;
2025 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2026 hash_keys.ports.src = fl6->fl6_sport;
2027 hash_keys.ports.dst = fl6->fl6_dport;
2028 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2029 }
2030 break;
23aebdac 2031 }
9a2a537a 2032 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2033
9a2a537a 2034 return mhash >> 1;
23aebdac
JS
2035}
2036
c71099ac
TG
2037void ip6_route_input(struct sk_buff *skb)
2038{
b71d1d42 2039 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2040 struct net *net = dev_net(skb->dev);
adaa70bb 2041 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2042 struct ip_tunnel_info *tun_info;
4c9483b2 2043 struct flowi6 fl6 = {
e0d56fdd 2044 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2045 .daddr = iph->daddr,
2046 .saddr = iph->saddr,
6502ca52 2047 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2048 .flowi6_mark = skb->mark,
2049 .flowi6_proto = iph->nexthdr,
c71099ac 2050 };
5e5d6fed 2051 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2052
904af04d 2053 tun_info = skb_tunnel_info(skb);
46fa062a 2054 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2055 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2056
2057 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2058 flkeys = &_flkeys;
2059
23aebdac 2060 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2061 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2062 skb_dst_drop(skb);
b75cc8f9
DA
2063 skb_dst_set(skb,
2064 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2065}
2066
b75cc8f9
DA
2067static struct rt6_info *ip6_pol_route_output(struct net *net,
2068 struct fib6_table *table,
2069 struct flowi6 *fl6,
2070 const struct sk_buff *skb,
2071 int flags)
1da177e4 2072{
b75cc8f9 2073 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2074}
2075
6f21c96a
PA
2076struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2077 struct flowi6 *fl6, int flags)
c71099ac 2078{
d46a9d67 2079 bool any_src;
c71099ac 2080
3ede0bbc
RS
2081 if (ipv6_addr_type(&fl6->daddr) &
2082 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
4c1feac5
DA
2083 struct dst_entry *dst;
2084
2085 dst = l3mdev_link_scope_lookup(net, fl6);
2086 if (dst)
2087 return dst;
2088 }
ca254490 2089
1fb9489b 2090 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2091
d46a9d67 2092 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2093 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2094 (fl6->flowi6_oif && any_src))
77d16f45 2095 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2096
d46a9d67 2097 if (!any_src)
adaa70bb 2098 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2099 else if (sk)
2100 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2101
b75cc8f9 2102 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2103}
6f21c96a 2104EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2105
2774c131 2106struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2107{
5c1e6aa3 2108 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2109 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2110 struct dst_entry *new = NULL;
2111
1dbe3252 2112 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2113 DST_OBSOLETE_DEAD, 0);
14e50e57 2114 if (rt) {
0a1f5962 2115 rt6_info_init(rt);
81eb8447 2116 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2117
0a1f5962 2118 new = &rt->dst;
14e50e57 2119 new->__use = 1;
352e512c 2120 new->input = dst_discard;
ede2059d 2121 new->output = dst_discard_out;
14e50e57 2122
0a1f5962 2123 dst_copy_metrics(new, &ort->dst);
14e50e57 2124
1dbe3252 2125 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2126 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2127 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2128
2129 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2130#ifdef CONFIG_IPV6_SUBTREES
2131 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2132#endif
14e50e57
DM
2133 }
2134
69ead7af
DM
2135 dst_release(dst_orig);
2136 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2137}
14e50e57 2138
1da177e4
LT
2139/*
2140 * Destination cache support functions
2141 */
2142
8d1c802b 2143static bool fib6_check(struct fib6_info *f6i, u32 cookie)
4b32b5ad 2144{
93531c67
DA
2145 u32 rt_cookie = 0;
2146
8ae86971 2147 if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
93531c67
DA
2148 return false;
2149
2150 if (fib6_check_expired(f6i))
2151 return false;
2152
2153 return true;
4b32b5ad
MKL
2154}
2155
a68886a6
DA
2156static struct dst_entry *rt6_check(struct rt6_info *rt,
2157 struct fib6_info *from,
2158 u32 cookie)
3da59bd9 2159{
36143645 2160 u32 rt_cookie = 0;
c5cff856 2161
a68886a6 2162 if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
93531c67 2163 rt_cookie != cookie)
3da59bd9
MKL
2164 return NULL;
2165
2166 if (rt6_check_expired(rt))
2167 return NULL;
2168
2169 return &rt->dst;
2170}
2171
a68886a6
DA
2172static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2173 struct fib6_info *from,
2174 u32 cookie)
3da59bd9 2175{
5973fb1e
MKL
2176 if (!__rt6_check_expired(rt) &&
2177 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
a68886a6 2178 fib6_check(from, cookie))
3da59bd9
MKL
2179 return &rt->dst;
2180 else
2181 return NULL;
2182}
2183
1da177e4
LT
2184static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2185{
a87b7dc9 2186 struct dst_entry *dst_ret;
a68886a6 2187 struct fib6_info *from;
1da177e4
LT
2188 struct rt6_info *rt;
2189
a87b7dc9
DA
2190 rt = container_of(dst, struct rt6_info, dst);
2191
2192 rcu_read_lock();
1da177e4 2193
6f3118b5
ND
2194 /* All IPV6 dsts are created with ->obsolete set to the value
2195 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2196 * into this function always.
2197 */
e3bc10bd 2198
a68886a6 2199 from = rcu_dereference(rt->from);
4b32b5ad 2200
a68886a6
DA
2201 if (from && (rt->rt6i_flags & RTF_PCPU ||
2202 unlikely(!list_empty(&rt->rt6i_uncached))))
2203 dst_ret = rt6_dst_from_check(rt, from, cookie);
3da59bd9 2204 else
a68886a6 2205 dst_ret = rt6_check(rt, from, cookie);
a87b7dc9
DA
2206
2207 rcu_read_unlock();
2208
2209 return dst_ret;
1da177e4
LT
2210}
2211
2212static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2213{
2214 struct rt6_info *rt = (struct rt6_info *) dst;
2215
2216 if (rt) {
54c1a859 2217 if (rt->rt6i_flags & RTF_CACHE) {
c3c14da0 2218 rcu_read_lock();
54c1a859 2219 if (rt6_check_expired(rt)) {
93531c67 2220 rt6_remove_exception_rt(rt);
54c1a859
YH
2221 dst = NULL;
2222 }
c3c14da0 2223 rcu_read_unlock();
54c1a859 2224 } else {
1da177e4 2225 dst_release(dst);
54c1a859
YH
2226 dst = NULL;
2227 }
1da177e4 2228 }
54c1a859 2229 return dst;
1da177e4
LT
2230}
2231
2232static void ip6_link_failure(struct sk_buff *skb)
2233{
2234 struct rt6_info *rt;
2235
3ffe533c 2236 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2237
adf30907 2238 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2239 if (rt) {
8a14e46f 2240 rcu_read_lock();
1eb4f758 2241 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0 2242 if (dst_hold_safe(&rt->dst))
93531c67 2243 rt6_remove_exception_rt(rt);
c5cff856 2244 } else {
a68886a6 2245 struct fib6_info *from;
c5cff856
WW
2246 struct fib6_node *fn;
2247
a68886a6
DA
2248 from = rcu_dereference(rt->from);
2249 if (from) {
2250 fn = rcu_dereference(from->fib6_node);
2251 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2252 fn->fn_sernum = -1;
2253 }
1eb4f758 2254 }
8a14e46f 2255 rcu_read_unlock();
1da177e4
LT
2256 }
2257}
2258
6a3e030f
DA
2259static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2260{
a68886a6
DA
2261 if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2262 struct fib6_info *from;
2263
2264 rcu_read_lock();
2265 from = rcu_dereference(rt0->from);
2266 if (from)
2267 rt0->dst.expires = from->expires;
2268 rcu_read_unlock();
2269 }
6a3e030f
DA
2270
2271 dst_set_expires(&rt0->dst, timeout);
2272 rt0->rt6i_flags |= RTF_EXPIRES;
2273}
2274
45e4fd26
MKL
2275static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2276{
2277 struct net *net = dev_net(rt->dst.dev);
2278
d4ead6b3 2279 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2280 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2281 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2282}
2283
0d3f6d29
MKL
2284static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2285{
a68886a6
DA
2286 bool from_set;
2287
2288 rcu_read_lock();
2289 from_set = !!rcu_dereference(rt->from);
2290 rcu_read_unlock();
2291
0d3f6d29 2292 return !(rt->rt6i_flags & RTF_CACHE) &&
a68886a6 2293 (rt->rt6i_flags & RTF_PCPU || from_set);
0d3f6d29
MKL
2294}
2295
45e4fd26
MKL
2296static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2297 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2298{
0dec879f 2299 const struct in6_addr *daddr, *saddr;
67ba4152 2300 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2301
19bda36c
XL
2302 if (dst_metric_locked(dst, RTAX_MTU))
2303 return;
2304
0dec879f
JA
2305 if (iph) {
2306 daddr = &iph->daddr;
2307 saddr = &iph->saddr;
2308 } else if (sk) {
2309 daddr = &sk->sk_v6_daddr;
2310 saddr = &inet6_sk(sk)->saddr;
2311 } else {
2312 daddr = NULL;
2313 saddr = NULL;
2314 }
2315 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2316 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2317 if (mtu >= dst_mtu(dst))
2318 return;
9d289715 2319
0d3f6d29 2320 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2321 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2322 /* update rt6_ex->stamp for cache */
2323 if (rt6->rt6i_flags & RTF_CACHE)
2324 rt6_update_exception_stamp_rt(rt6);
0dec879f 2325 } else if (daddr) {
a68886a6 2326 struct fib6_info *from;
45e4fd26
MKL
2327 struct rt6_info *nrt6;
2328
4d85cd0c 2329 rcu_read_lock();
a68886a6
DA
2330 from = rcu_dereference(rt6->from);
2331 nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
45e4fd26
MKL
2332 if (nrt6) {
2333 rt6_do_update_pmtu(nrt6, mtu);
a68886a6 2334 if (rt6_insert_exception(nrt6, from))
2b760fcf 2335 dst_release_immediate(&nrt6->dst);
45e4fd26 2336 }
a68886a6 2337 rcu_read_unlock();
1da177e4
LT
2338 }
2339}
2340
45e4fd26
MKL
2341static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2342 struct sk_buff *skb, u32 mtu)
2343{
2344 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2345}
2346
42ae66c8 2347void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2348 int oif, u32 mark, kuid_t uid)
81aded24
DM
2349{
2350 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2351 struct dst_entry *dst;
2352 struct flowi6 fl6;
2353
2354 memset(&fl6, 0, sizeof(fl6));
2355 fl6.flowi6_oif = oif;
1b3c61dc 2356 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
2357 fl6.daddr = iph->daddr;
2358 fl6.saddr = iph->saddr;
6502ca52 2359 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2360 fl6.flowi6_uid = uid;
81aded24
DM
2361
2362 dst = ip6_route_output(net, NULL, &fl6);
2363 if (!dst->error)
45e4fd26 2364 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2365 dst_release(dst);
2366}
2367EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2368
2369void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2370{
33c162a9
MKL
2371 struct dst_entry *dst;
2372
81aded24 2373 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 2374 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2375
2376 dst = __sk_dst_get(sk);
2377 if (!dst || !dst->obsolete ||
2378 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2379 return;
2380
2381 bh_lock_sock(sk);
2382 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2383 ip6_datagram_dst_update(sk, false);
2384 bh_unlock_sock(sk);
81aded24
DM
2385}
2386EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2387
7d6850f7
AK
2388void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2389 const struct flowi6 *fl6)
2390{
2391#ifdef CONFIG_IPV6_SUBTREES
2392 struct ipv6_pinfo *np = inet6_sk(sk);
2393#endif
2394
2395 ip6_dst_store(sk, dst,
2396 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2397 &sk->sk_v6_daddr : NULL,
2398#ifdef CONFIG_IPV6_SUBTREES
2399 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2400 &np->saddr :
2401#endif
2402 NULL);
2403}
2404
b55b76b2
DJ
2405/* Handle redirects */
2406struct ip6rd_flowi {
2407 struct flowi6 fl6;
2408 struct in6_addr gateway;
2409};
2410
2411static struct rt6_info *__ip6_route_redirect(struct net *net,
2412 struct fib6_table *table,
2413 struct flowi6 *fl6,
b75cc8f9 2414 const struct sk_buff *skb,
b55b76b2
DJ
2415 int flags)
2416{
2417 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
23fb93a4 2418 struct rt6_info *ret = NULL, *rt_cache;
8d1c802b 2419 struct fib6_info *rt;
b55b76b2
DJ
2420 struct fib6_node *fn;
2421
2422 /* Get the "current" route for this destination and
67c408cf 2423 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2424 *
2425 * RFC 4861 specifies that redirects should only be
2426 * accepted if they come from the nexthop to the target.
2427 * Due to the way the routes are chosen, this notion
2428 * is a bit fuzzy and one might need to check all possible
2429 * routes.
2430 */
2431
66f5d6ce 2432 rcu_read_lock();
6454743b 2433 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
b55b76b2 2434restart:
66f5d6ce 2435 for_each_fib6_node_rt_rcu(fn) {
5e670d84 2436 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c 2437 continue;
14895687 2438 if (fib6_check_expired(rt))
b55b76b2 2439 continue;
93c2fb25 2440 if (rt->fib6_flags & RTF_REJECT)
b55b76b2 2441 break;
93c2fb25 2442 if (!(rt->fib6_flags & RTF_GATEWAY))
b55b76b2 2443 continue;
5e670d84 2444 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
b55b76b2 2445 continue;
2b760fcf
WW
2446 /* rt_cache's gateway might be different from its 'parent'
2447 * in the case of an ip redirect.
2448 * So we keep searching in the exception table if the gateway
2449 * is different.
2450 */
5e670d84 2451 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2b760fcf
WW
2452 rt_cache = rt6_find_cached_rt(rt,
2453 &fl6->daddr,
2454 &fl6->saddr);
2455 if (rt_cache &&
2456 ipv6_addr_equal(&rdfl->gateway,
2457 &rt_cache->rt6i_gateway)) {
23fb93a4 2458 ret = rt_cache;
2b760fcf
WW
2459 break;
2460 }
b55b76b2 2461 continue;
2b760fcf 2462 }
b55b76b2
DJ
2463 break;
2464 }
2465
2466 if (!rt)
421842ed 2467 rt = net->ipv6.fib6_null_entry;
93c2fb25 2468 else if (rt->fib6_flags & RTF_REJECT) {
23fb93a4 2469 ret = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2470 goto out;
2471 }
2472
421842ed 2473 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2474 fn = fib6_backtrack(fn, &fl6->saddr);
2475 if (fn)
2476 goto restart;
b55b76b2 2477 }
a3c00e46 2478
b0a1ba59 2479out:
23fb93a4 2480 if (ret)
e873e4b9 2481 ip6_hold_safe(net, &ret, true);
23fb93a4
DA
2482 else
2483 ret = ip6_create_rt_rcu(rt);
b55b76b2 2484
66f5d6ce 2485 rcu_read_unlock();
b55b76b2 2486
b65f164d 2487 trace_fib6_table_lookup(net, rt, table, fl6);
23fb93a4 2488 return ret;
b55b76b2
DJ
2489};
2490
2491static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2492 const struct flowi6 *fl6,
2493 const struct sk_buff *skb,
2494 const struct in6_addr *gateway)
b55b76b2
DJ
2495{
2496 int flags = RT6_LOOKUP_F_HAS_SADDR;
2497 struct ip6rd_flowi rdfl;
2498
2499 rdfl.fl6 = *fl6;
2500 rdfl.gateway = *gateway;
2501
b75cc8f9 2502 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2503 flags, __ip6_route_redirect);
2504}
2505
e2d118a1
LC
2506void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2507 kuid_t uid)
3a5ad2ee
DM
2508{
2509 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2510 struct dst_entry *dst;
2511 struct flowi6 fl6;
2512
2513 memset(&fl6, 0, sizeof(fl6));
e374c618 2514 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2515 fl6.flowi6_oif = oif;
2516 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2517 fl6.daddr = iph->daddr;
2518 fl6.saddr = iph->saddr;
6502ca52 2519 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2520 fl6.flowi6_uid = uid;
3a5ad2ee 2521
b75cc8f9 2522 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2523 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2524 dst_release(dst);
2525}
2526EXPORT_SYMBOL_GPL(ip6_redirect);
2527
c92a59ec
DJ
2528void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2529 u32 mark)
2530{
2531 const struct ipv6hdr *iph = ipv6_hdr(skb);
2532 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2533 struct dst_entry *dst;
2534 struct flowi6 fl6;
2535
2536 memset(&fl6, 0, sizeof(fl6));
e374c618 2537 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2538 fl6.flowi6_oif = oif;
2539 fl6.flowi6_mark = mark;
c92a59ec
DJ
2540 fl6.daddr = msg->dest;
2541 fl6.saddr = iph->daddr;
e2d118a1 2542 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2543
b75cc8f9 2544 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2545 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2546 dst_release(dst);
2547}
2548
3a5ad2ee
DM
2549void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2550{
e2d118a1
LC
2551 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2552 sk->sk_uid);
3a5ad2ee
DM
2553}
2554EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2555
0dbaee3b 2556static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2557{
0dbaee3b
DM
2558 struct net_device *dev = dst->dev;
2559 unsigned int mtu = dst_mtu(dst);
2560 struct net *net = dev_net(dev);
2561
1da177e4
LT
2562 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2563
5578689a
DL
2564 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2565 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2566
2567 /*
1ab1457c
YH
2568 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2569 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2570 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2571 * rely only on pmtu discovery"
2572 */
2573 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2574 mtu = IPV6_MAXPLEN;
2575 return mtu;
2576}
2577
ebb762f2 2578static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2579{
d33e4553 2580 struct inet6_dev *idev;
d4ead6b3 2581 unsigned int mtu;
4b32b5ad
MKL
2582
2583 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2584 if (mtu)
30f78d8e 2585 goto out;
618f9bc7
SK
2586
2587 mtu = IPV6_MIN_MTU;
d33e4553
DM
2588
2589 rcu_read_lock();
2590 idev = __in6_dev_get(dst->dev);
2591 if (idev)
2592 mtu = idev->cnf.mtu6;
2593 rcu_read_unlock();
2594
30f78d8e 2595out:
14972cbd
RP
2596 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2597
2598 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2599}
2600
901731b8
DA
2601/* MTU selection:
2602 * 1. mtu on route is locked - use it
2603 * 2. mtu from nexthop exception
2604 * 3. mtu from egress device
2605 *
2606 * based on ip6_dst_mtu_forward and exception logic of
2607 * rt6_find_cached_rt; called with rcu_read_lock
2608 */
2609u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
2610 struct in6_addr *saddr)
2611{
2612 struct rt6_exception_bucket *bucket;
2613 struct rt6_exception *rt6_ex;
2614 struct in6_addr *src_key;
2615 struct inet6_dev *idev;
2616 u32 mtu = 0;
2617
2618 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2619 mtu = f6i->fib6_pmtu;
2620 if (mtu)
2621 goto out;
2622 }
2623
2624 src_key = NULL;
2625#ifdef CONFIG_IPV6_SUBTREES
2626 if (f6i->fib6_src.plen)
2627 src_key = saddr;
2628#endif
2629
2630 bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2631 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2632 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2633 mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2634
2635 if (likely(!mtu)) {
2636 struct net_device *dev = fib6_info_nh_dev(f6i);
2637
2638 mtu = IPV6_MIN_MTU;
2639 idev = __in6_dev_get(dev);
2640 if (idev && idev->cnf.mtu6 > mtu)
2641 mtu = idev->cnf.mtu6;
2642 }
2643
2644 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2645out:
2646 return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
2647}
2648
3b00944c 2649struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2650 struct flowi6 *fl6)
1da177e4 2651{
87a11578 2652 struct dst_entry *dst;
1da177e4
LT
2653 struct rt6_info *rt;
2654 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2655 struct net *net = dev_net(dev);
1da177e4 2656
38308473 2657 if (unlikely(!idev))
122bdf67 2658 return ERR_PTR(-ENODEV);
1da177e4 2659
ad706862 2660 rt = ip6_dst_alloc(net, dev, 0);
38308473 2661 if (unlikely(!rt)) {
1da177e4 2662 in6_dev_put(idev);
87a11578 2663 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2664 goto out;
2665 }
2666
8e2ec639 2667 rt->dst.flags |= DST_HOST;
588753f1 2668 rt->dst.input = ip6_input;
8e2ec639 2669 rt->dst.output = ip6_output;
550bab42 2670 rt->rt6i_gateway = fl6->daddr;
87a11578 2671 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2672 rt->rt6i_dst.plen = 128;
2673 rt->rt6i_idev = idev;
14edd87d 2674 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2675
4c981e28 2676 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2677 * do proper release of the net_device
2678 */
2679 rt6_uncached_list_add(rt);
81eb8447 2680 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2681
87a11578
DM
2682 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2683
1da177e4 2684out:
87a11578 2685 return dst;
1da177e4
LT
2686}
2687
569d3645 2688static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2689{
86393e52 2690 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2691 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2692 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2693 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2694 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2695 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2696 int entries;
7019b78e 2697
fc66f95c 2698 entries = dst_entries_get_fast(ops);
49a18d86 2699 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2700 entries <= rt_max_size)
1da177e4
LT
2701 goto out;
2702
6891a346 2703 net->ipv6.ip6_rt_gc_expire++;
14956643 2704 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2705 entries = dst_entries_get_slow(ops);
2706 if (entries < ops->gc_thresh)
7019b78e 2707 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2708out:
7019b78e 2709 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2710 return entries > rt_max_size;
1da177e4
LT
2711}
2712
8d1c802b 2713static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
d4ead6b3 2714 struct fib6_config *cfg)
e715b6d3 2715{
263243d6 2716 struct dst_metrics *p;
e715b6d3 2717
63159f29 2718 if (!cfg->fc_mx)
e715b6d3
FW
2719 return 0;
2720
263243d6
ED
2721 p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL);
2722 if (unlikely(!p))
e715b6d3
FW
2723 return -ENOMEM;
2724
263243d6
ED
2725 refcount_set(&p->refcnt, 1);
2726 rt->fib6_metrics = p;
e715b6d3 2727
263243d6 2728 return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics);
e715b6d3 2729}
1da177e4 2730
8c14586f
DA
2731static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2732 struct fib6_config *cfg,
f4797b33
DA
2733 const struct in6_addr *gw_addr,
2734 u32 tbid, int flags)
8c14586f
DA
2735{
2736 struct flowi6 fl6 = {
2737 .flowi6_oif = cfg->fc_ifindex,
2738 .daddr = *gw_addr,
2739 .saddr = cfg->fc_prefsrc,
2740 };
2741 struct fib6_table *table;
2742 struct rt6_info *rt;
8c14586f 2743
f4797b33 2744 table = fib6_get_table(net, tbid);
8c14586f
DA
2745 if (!table)
2746 return NULL;
2747
2748 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2749 flags |= RT6_LOOKUP_F_HAS_SADDR;
2750
f4797b33 2751 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2752 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2753
2754 /* if table lookup failed, fall back to full lookup */
2755 if (rt == net->ipv6.ip6_null_entry) {
2756 ip6_rt_put(rt);
2757 rt = NULL;
2758 }
2759
2760 return rt;
2761}
2762
fc1e64e1
DA
2763static int ip6_route_check_nh_onlink(struct net *net,
2764 struct fib6_config *cfg,
9fbb704c 2765 const struct net_device *dev,
fc1e64e1
DA
2766 struct netlink_ext_ack *extack)
2767{
44750f84 2768 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2769 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2770 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2771 struct rt6_info *grt;
2772 int err;
2773
2774 err = 0;
2775 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2776 if (grt) {
58e354c0
DA
2777 if (!grt->dst.error &&
2778 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2779 NL_SET_ERR_MSG(extack,
2780 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2781 err = -EINVAL;
2782 }
2783
2784 ip6_rt_put(grt);
2785 }
2786
2787 return err;
2788}
2789
1edce99f
DA
2790static int ip6_route_check_nh(struct net *net,
2791 struct fib6_config *cfg,
2792 struct net_device **_dev,
2793 struct inet6_dev **idev)
2794{
2795 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2796 struct net_device *dev = _dev ? *_dev : NULL;
2797 struct rt6_info *grt = NULL;
2798 int err = -EHOSTUNREACH;
2799
2800 if (cfg->fc_table) {
f4797b33
DA
2801 int flags = RT6_LOOKUP_F_IFACE;
2802
2803 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2804 cfg->fc_table, flags);
1edce99f
DA
2805 if (grt) {
2806 if (grt->rt6i_flags & RTF_GATEWAY ||
2807 (dev && dev != grt->dst.dev)) {
2808 ip6_rt_put(grt);
2809 grt = NULL;
2810 }
2811 }
2812 }
2813
2814 if (!grt)
b75cc8f9 2815 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2816
2817 if (!grt)
2818 goto out;
2819
2820 if (dev) {
2821 if (dev != grt->dst.dev) {
2822 ip6_rt_put(grt);
2823 goto out;
2824 }
2825 } else {
2826 *_dev = dev = grt->dst.dev;
2827 *idev = grt->rt6i_idev;
2828 dev_hold(dev);
2829 in6_dev_hold(grt->rt6i_idev);
2830 }
2831
2832 if (!(grt->rt6i_flags & RTF_GATEWAY))
2833 err = 0;
2834
2835 ip6_rt_put(grt);
2836
2837out:
2838 return err;
2839}
2840
9fbb704c
DA
2841static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2842 struct net_device **_dev, struct inet6_dev **idev,
2843 struct netlink_ext_ack *extack)
2844{
2845 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2846 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2847 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2848 const struct net_device *dev = *_dev;
232378e8 2849 bool need_addr_check = !dev;
9fbb704c
DA
2850 int err = -EINVAL;
2851
2852 /* if gw_addr is local we will fail to detect this in case
2853 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2854 * will return already-added prefix route via interface that
2855 * prefix route was assigned to, which might be non-loopback.
2856 */
232378e8
DA
2857 if (dev &&
2858 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2859 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2860 goto out;
2861 }
2862
2863 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2864 /* IPv6 strictly inhibits using not link-local
2865 * addresses as nexthop address.
2866 * Otherwise, router will not able to send redirects.
2867 * It is very good, but in some (rare!) circumstances
2868 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2869 * some exceptions. --ANK
2870 * We allow IPv4-mapped nexthops to support RFC4798-type
2871 * addressing
2872 */
2873 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2874 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2875 goto out;
2876 }
2877
2878 if (cfg->fc_flags & RTNH_F_ONLINK)
2879 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2880 else
2881 err = ip6_route_check_nh(net, cfg, _dev, idev);
2882
2883 if (err)
2884 goto out;
2885 }
2886
2887 /* reload in case device was changed */
2888 dev = *_dev;
2889
2890 err = -EINVAL;
2891 if (!dev) {
2892 NL_SET_ERR_MSG(extack, "Egress device not specified");
2893 goto out;
2894 } else if (dev->flags & IFF_LOOPBACK) {
2895 NL_SET_ERR_MSG(extack,
2896 "Egress device can not be loopback device for this route");
2897 goto out;
2898 }
232378e8
DA
2899
2900 /* if we did not check gw_addr above, do so now that the
2901 * egress device has been resolved.
2902 */
2903 if (need_addr_check &&
2904 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2905 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2906 goto out;
2907 }
2908
9fbb704c
DA
2909 err = 0;
2910out:
2911 return err;
2912}
2913
8d1c802b 2914static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
acb54e3c 2915 gfp_t gfp_flags,
333c4301 2916 struct netlink_ext_ack *extack)
1da177e4 2917{
5578689a 2918 struct net *net = cfg->fc_nlinfo.nl_net;
8d1c802b 2919 struct fib6_info *rt = NULL;
1da177e4
LT
2920 struct net_device *dev = NULL;
2921 struct inet6_dev *idev = NULL;
c71099ac 2922 struct fib6_table *table;
1da177e4 2923 int addr_type;
8c5b83f0 2924 int err = -EINVAL;
1da177e4 2925
557c44be 2926 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2927 if (cfg->fc_flags & RTF_PCPU) {
2928 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2929 goto out;
d5d531cb 2930 }
557c44be 2931
2ea2352e
WW
2932 /* RTF_CACHE is an internal flag; can not be set by userspace */
2933 if (cfg->fc_flags & RTF_CACHE) {
2934 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2935 goto out;
2936 }
2937
e8478e80
DA
2938 if (cfg->fc_type > RTN_MAX) {
2939 NL_SET_ERR_MSG(extack, "Invalid route type");
2940 goto out;
2941 }
2942
d5d531cb
DA
2943 if (cfg->fc_dst_len > 128) {
2944 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2945 goto out;
2946 }
2947 if (cfg->fc_src_len > 128) {
2948 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2949 goto out;
d5d531cb 2950 }
1da177e4 2951#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2952 if (cfg->fc_src_len) {
2953 NL_SET_ERR_MSG(extack,
2954 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2955 goto out;
d5d531cb 2956 }
1da177e4 2957#endif
86872cb5 2958 if (cfg->fc_ifindex) {
1da177e4 2959 err = -ENODEV;
5578689a 2960 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2961 if (!dev)
2962 goto out;
2963 idev = in6_dev_get(dev);
2964 if (!idev)
2965 goto out;
2966 }
2967
86872cb5
TG
2968 if (cfg->fc_metric == 0)
2969 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2970
fc1e64e1
DA
2971 if (cfg->fc_flags & RTNH_F_ONLINK) {
2972 if (!dev) {
2973 NL_SET_ERR_MSG(extack,
2974 "Nexthop device required for onlink");
2975 err = -ENODEV;
2976 goto out;
2977 }
2978
2979 if (!(dev->flags & IFF_UP)) {
2980 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2981 err = -ENETDOWN;
2982 goto out;
2983 }
2984 }
2985
d71314b4 2986 err = -ENOBUFS;
38308473
DM
2987 if (cfg->fc_nlinfo.nlh &&
2988 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2989 table = fib6_get_table(net, cfg->fc_table);
38308473 2990 if (!table) {
f3213831 2991 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2992 table = fib6_new_table(net, cfg->fc_table);
2993 }
2994 } else {
2995 table = fib6_new_table(net, cfg->fc_table);
2996 }
38308473
DM
2997
2998 if (!table)
c71099ac 2999 goto out;
c71099ac 3000
93531c67
DA
3001 err = -ENOMEM;
3002 rt = fib6_info_alloc(gfp_flags);
3003 if (!rt)
1da177e4 3004 goto out;
93531c67
DA
3005
3006 if (cfg->fc_flags & RTF_ADDRCONF)
3007 rt->dst_nocount = true;
1da177e4 3008
d4ead6b3
DA
3009 err = ip6_convert_metrics(net, rt, cfg);
3010 if (err < 0)
1da177e4 3011 goto out;
1da177e4 3012
1716a961 3013 if (cfg->fc_flags & RTF_EXPIRES)
14895687 3014 fib6_set_expires(rt, jiffies +
1716a961
G
3015 clock_t_to_jiffies(cfg->fc_expires));
3016 else
14895687 3017 fib6_clean_expires(rt);
1da177e4 3018
86872cb5
TG
3019 if (cfg->fc_protocol == RTPROT_UNSPEC)
3020 cfg->fc_protocol = RTPROT_BOOT;
93c2fb25 3021 rt->fib6_protocol = cfg->fc_protocol;
86872cb5
TG
3022
3023 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4 3024
19e42e45
RP
3025 if (cfg->fc_encap) {
3026 struct lwtunnel_state *lwtstate;
3027
30357d7d 3028 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 3029 cfg->fc_encap, AF_INET6, cfg,
9ae28727 3030 &lwtstate, extack);
19e42e45
RP
3031 if (err)
3032 goto out;
5e670d84 3033 rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
19e42e45
RP
3034 }
3035
93c2fb25
DA
3036 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3037 rt->fib6_dst.plen = cfg->fc_dst_len;
3038 if (rt->fib6_dst.plen == 128)
3b6761d1 3039 rt->dst_host = true;
e5fd387a 3040
1da177e4 3041#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
3042 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3043 rt->fib6_src.plen = cfg->fc_src_len;
1da177e4
LT
3044#endif
3045
93c2fb25 3046 rt->fib6_metric = cfg->fc_metric;
5e670d84 3047 rt->fib6_nh.nh_weight = 1;
1da177e4 3048
e8478e80 3049 rt->fib6_type = cfg->fc_type;
1da177e4
LT
3050
3051 /* We cannot add true routes via loopback here,
3052 they would result in kernel looping; promote them to reject routes
3053 */
86872cb5 3054 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
3055 (dev && (dev->flags & IFF_LOOPBACK) &&
3056 !(addr_type & IPV6_ADDR_LOOPBACK) &&
3057 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 3058 /* hold loopback dev/idev if we haven't done so. */
5578689a 3059 if (dev != net->loopback_dev) {
1da177e4
LT
3060 if (dev) {
3061 dev_put(dev);
3062 in6_dev_put(idev);
3063 }
5578689a 3064 dev = net->loopback_dev;
1da177e4
LT
3065 dev_hold(dev);
3066 idev = in6_dev_get(dev);
3067 if (!idev) {
3068 err = -ENODEV;
3069 goto out;
3070 }
3071 }
93c2fb25 3072 rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
1da177e4
LT
3073 goto install_route;
3074 }
3075
86872cb5 3076 if (cfg->fc_flags & RTF_GATEWAY) {
9fbb704c
DA
3077 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3078 if (err)
48ed7b26 3079 goto out;
1da177e4 3080
93531c67 3081 rt->fib6_nh.nh_gw = cfg->fc_gateway;
1da177e4
LT
3082 }
3083
3084 err = -ENODEV;
38308473 3085 if (!dev)
1da177e4
LT
3086 goto out;
3087
428604fb
LB
3088 if (idev->cnf.disable_ipv6) {
3089 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3090 err = -EACCES;
3091 goto out;
3092 }
3093
955ec4cb
DA
3094 if (!(dev->flags & IFF_UP)) {
3095 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3096 err = -ENETDOWN;
3097 goto out;
3098 }
3099
c3968a85
DW
3100 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3101 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3102 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3103 err = -EINVAL;
3104 goto out;
3105 }
93c2fb25
DA
3106 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3107 rt->fib6_prefsrc.plen = 128;
c3968a85 3108 } else
93c2fb25 3109 rt->fib6_prefsrc.plen = 0;
c3968a85 3110
93c2fb25 3111 rt->fib6_flags = cfg->fc_flags;
1da177e4
LT
3112
3113install_route:
93c2fb25 3114 if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
5609b80a 3115 !netif_carrier_ok(dev))
5e670d84
DA
3116 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3117 rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
93531c67 3118 rt->fib6_nh.nh_dev = dev;
93c2fb25 3119 rt->fib6_table = table;
63152fc0 3120
dcd1f572
DA
3121 if (idev)
3122 in6_dev_put(idev);
3123
8c5b83f0 3124 return rt;
6b9ea5a6
RP
3125out:
3126 if (dev)
3127 dev_put(dev);
3128 if (idev)
3129 in6_dev_put(idev);
6b9ea5a6 3130
93531c67 3131 fib6_info_release(rt);
8c5b83f0 3132 return ERR_PTR(err);
6b9ea5a6
RP
3133}
3134
acb54e3c 3135int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
333c4301 3136 struct netlink_ext_ack *extack)
6b9ea5a6 3137{
8d1c802b 3138 struct fib6_info *rt;
6b9ea5a6
RP
3139 int err;
3140
acb54e3c 3141 rt = ip6_route_info_create(cfg, gfp_flags, extack);
d4ead6b3
DA
3142 if (IS_ERR(rt))
3143 return PTR_ERR(rt);
6b9ea5a6 3144
d4ead6b3 3145 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
93531c67 3146 fib6_info_release(rt);
6b9ea5a6 3147
1da177e4
LT
3148 return err;
3149}
3150
8d1c802b 3151static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
1da177e4 3152{
afb1d4b5 3153 struct net *net = info->nl_net;
c71099ac 3154 struct fib6_table *table;
afb1d4b5 3155 int err;
1da177e4 3156
421842ed 3157 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3158 err = -ENOENT;
3159 goto out;
3160 }
6c813a72 3161
93c2fb25 3162 table = rt->fib6_table;
66f5d6ce 3163 spin_lock_bh(&table->tb6_lock);
86872cb5 3164 err = fib6_del(rt, info);
66f5d6ce 3165 spin_unlock_bh(&table->tb6_lock);
1da177e4 3166
6825a26c 3167out:
93531c67 3168 fib6_info_release(rt);
1da177e4
LT
3169 return err;
3170}
3171
8d1c802b 3172int ip6_del_rt(struct net *net, struct fib6_info *rt)
e0a1ad73 3173{
afb1d4b5
DA
3174 struct nl_info info = { .nl_net = net };
3175
528c4ceb 3176 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3177}
3178
8d1c802b 3179static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
0ae81335
DA
3180{
3181 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3182 struct net *net = info->nl_net;
16a16cd3 3183 struct sk_buff *skb = NULL;
0ae81335 3184 struct fib6_table *table;
e3330039 3185 int err = -ENOENT;
0ae81335 3186
421842ed 3187 if (rt == net->ipv6.fib6_null_entry)
e3330039 3188 goto out_put;
93c2fb25 3189 table = rt->fib6_table;
66f5d6ce 3190 spin_lock_bh(&table->tb6_lock);
0ae81335 3191
93c2fb25 3192 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
8d1c802b 3193 struct fib6_info *sibling, *next_sibling;
0ae81335 3194
16a16cd3
DA
3195 /* prefer to send a single notification with all hops */
3196 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3197 if (skb) {
3198 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3199
d4ead6b3 3200 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3201 NULL, NULL, 0, RTM_DELROUTE,
3202 info->portid, seq, 0) < 0) {
3203 kfree_skb(skb);
3204 skb = NULL;
3205 } else
3206 info->skip_notify = 1;
3207 }
3208
0ae81335 3209 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25
DA
3210 &rt->fib6_siblings,
3211 fib6_siblings) {
0ae81335
DA
3212 err = fib6_del(sibling, info);
3213 if (err)
e3330039 3214 goto out_unlock;
0ae81335
DA
3215 }
3216 }
3217
3218 err = fib6_del(rt, info);
e3330039 3219out_unlock:
66f5d6ce 3220 spin_unlock_bh(&table->tb6_lock);
e3330039 3221out_put:
93531c67 3222 fib6_info_release(rt);
16a16cd3
DA
3223
3224 if (skb) {
e3330039 3225 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3226 info->nlh, gfp_any());
3227 }
0ae81335
DA
3228 return err;
3229}
3230
23fb93a4
DA
3231static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3232{
3233 int rc = -ESRCH;
3234
3235 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3236 goto out;
3237
3238 if (cfg->fc_flags & RTF_GATEWAY &&
3239 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3240 goto out;
3241 if (dst_hold_safe(&rt->dst))
3242 rc = rt6_remove_exception_rt(rt);
3243out:
3244 return rc;
3245}
3246
333c4301
DA
3247static int ip6_route_del(struct fib6_config *cfg,
3248 struct netlink_ext_ack *extack)
1da177e4 3249{
8d1c802b 3250 struct rt6_info *rt_cache;
c71099ac 3251 struct fib6_table *table;
8d1c802b 3252 struct fib6_info *rt;
1da177e4 3253 struct fib6_node *fn;
1da177e4
LT
3254 int err = -ESRCH;
3255
5578689a 3256 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3257 if (!table) {
3258 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3259 return err;
d5d531cb 3260 }
c71099ac 3261
66f5d6ce 3262 rcu_read_lock();
1da177e4 3263
c71099ac 3264 fn = fib6_locate(&table->tb6_root,
86872cb5 3265 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3266 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3267 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3268
1da177e4 3269 if (fn) {
66f5d6ce 3270 for_each_fib6_node_rt_rcu(fn) {
2b760fcf 3271 if (cfg->fc_flags & RTF_CACHE) {
23fb93a4
DA
3272 int rc;
3273
2b760fcf
WW
3274 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3275 &cfg->fc_src);
23fb93a4
DA
3276 if (rt_cache) {
3277 rc = ip6_del_cached_rt(rt_cache, cfg);
9e575010
ED
3278 if (rc != -ESRCH) {
3279 rcu_read_unlock();
23fb93a4 3280 return rc;
9e575010 3281 }
23fb93a4
DA
3282 }
3283 continue;
2b760fcf 3284 }
86872cb5 3285 if (cfg->fc_ifindex &&
5e670d84
DA
3286 (!rt->fib6_nh.nh_dev ||
3287 rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3288 continue;
86872cb5 3289 if (cfg->fc_flags & RTF_GATEWAY &&
5e670d84 3290 !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
1da177e4 3291 continue;
93c2fb25 3292 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
1da177e4 3293 continue;
93c2fb25 3294 if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
c2ed1880 3295 continue;
e873e4b9
WW
3296 if (!fib6_info_hold_safe(rt))
3297 continue;
66f5d6ce 3298 rcu_read_unlock();
1da177e4 3299
0ae81335
DA
3300 /* if gateway was specified only delete the one hop */
3301 if (cfg->fc_flags & RTF_GATEWAY)
3302 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3303
3304 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3305 }
3306 }
66f5d6ce 3307 rcu_read_unlock();
1da177e4
LT
3308
3309 return err;
3310}
3311
6700c270 3312static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3313{
a6279458 3314 struct netevent_redirect netevent;
e8599ff4 3315 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3316 struct ndisc_options ndopts;
3317 struct inet6_dev *in6_dev;
3318 struct neighbour *neigh;
a68886a6 3319 struct fib6_info *from;
71bcdba0 3320 struct rd_msg *msg;
6e157b6a
DM
3321 int optlen, on_link;
3322 u8 *lladdr;
e8599ff4 3323
29a3cad5 3324 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3325 optlen -= sizeof(*msg);
e8599ff4
DM
3326
3327 if (optlen < 0) {
6e157b6a 3328 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3329 return;
3330 }
3331
71bcdba0 3332 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3333
71bcdba0 3334 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3335 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3336 return;
3337 }
3338
6e157b6a 3339 on_link = 0;
71bcdba0 3340 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3341 on_link = 1;
71bcdba0 3342 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3343 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3344 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3345 return;
3346 }
3347
3348 in6_dev = __in6_dev_get(skb->dev);
3349 if (!in6_dev)
3350 return;
3351 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3352 return;
3353
3354 /* RFC2461 8.1:
3355 * The IP source address of the Redirect MUST be the same as the current
3356 * first-hop router for the specified ICMP Destination Address.
3357 */
3358
f997c55c 3359 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3360 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3361 return;
3362 }
6e157b6a
DM
3363
3364 lladdr = NULL;
e8599ff4
DM
3365 if (ndopts.nd_opts_tgt_lladdr) {
3366 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3367 skb->dev);
3368 if (!lladdr) {
3369 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3370 return;
3371 }
3372 }
3373
6e157b6a 3374 rt = (struct rt6_info *) dst;
ec13ad1d 3375 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3376 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3377 return;
6e157b6a 3378 }
e8599ff4 3379
6e157b6a
DM
3380 /* Redirect received -> path was valid.
3381 * Look, redirects are sent only in response to data packets,
3382 * so that this nexthop apparently is reachable. --ANK
3383 */
0dec879f 3384 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3385
71bcdba0 3386 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3387 if (!neigh)
3388 return;
a6279458 3389
1da177e4
LT
3390 /*
3391 * We have finally decided to accept it.
3392 */
3393
f997c55c 3394 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3395 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3396 NEIGH_UPDATE_F_OVERRIDE|
3397 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3398 NEIGH_UPDATE_F_ISROUTER)),
3399 NDISC_REDIRECT, &ndopts);
1da177e4 3400
4d85cd0c 3401 rcu_read_lock();
a68886a6 3402 from = rcu_dereference(rt->from);
e873e4b9
WW
3403 /* This fib6_info_hold() is safe here because we hold reference to rt
3404 * and rt already holds reference to fib6_info.
3405 */
8a14e46f 3406 fib6_info_hold(from);
4d85cd0c 3407 rcu_read_unlock();
8a14e46f
DA
3408
3409 nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
38308473 3410 if (!nrt)
1da177e4
LT
3411 goto out;
3412
3413 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3414 if (on_link)
3415 nrt->rt6i_flags &= ~RTF_GATEWAY;
3416
4e3fd7a0 3417 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3418
2b760fcf
WW
3419 /* No need to remove rt from the exception table if rt is
3420 * a cached route because rt6_insert_exception() will
3421 * takes care of it
3422 */
8a14e46f 3423 if (rt6_insert_exception(nrt, from)) {
2b760fcf
WW
3424 dst_release_immediate(&nrt->dst);
3425 goto out;
3426 }
1da177e4 3427
d8d1f30b
CG
3428 netevent.old = &rt->dst;
3429 netevent.new = &nrt->dst;
71bcdba0 3430 netevent.daddr = &msg->dest;
60592833 3431 netevent.neigh = neigh;
8d71740c
TT
3432 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3433
1da177e4 3434out:
8a14e46f 3435 fib6_info_release(from);
e8599ff4 3436 neigh_release(neigh);
6e157b6a
DM
3437}
3438
70ceb4f5 3439#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 3440static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 3441 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3442 const struct in6_addr *gwaddr,
3443 struct net_device *dev)
70ceb4f5 3444{
830218c1
DA
3445 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3446 int ifindex = dev->ifindex;
70ceb4f5 3447 struct fib6_node *fn;
8d1c802b 3448 struct fib6_info *rt = NULL;
c71099ac
TG
3449 struct fib6_table *table;
3450
830218c1 3451 table = fib6_get_table(net, tb_id);
38308473 3452 if (!table)
c71099ac 3453 return NULL;
70ceb4f5 3454
66f5d6ce 3455 rcu_read_lock();
38fbeeee 3456 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3457 if (!fn)
3458 goto out;
3459
66f5d6ce 3460 for_each_fib6_node_rt_rcu(fn) {
5e670d84 3461 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
70ceb4f5 3462 continue;
93c2fb25 3463 if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
70ceb4f5 3464 continue;
5e670d84 3465 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
70ceb4f5 3466 continue;
e873e4b9
WW
3467 if (!fib6_info_hold_safe(rt))
3468 continue;
70ceb4f5
YH
3469 break;
3470 }
3471out:
66f5d6ce 3472 rcu_read_unlock();
70ceb4f5
YH
3473 return rt;
3474}
3475
8d1c802b 3476static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 3477 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3478 const struct in6_addr *gwaddr,
3479 struct net_device *dev,
95c96174 3480 unsigned int pref)
70ceb4f5 3481{
86872cb5 3482 struct fib6_config cfg = {
238fc7ea 3483 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3484 .fc_ifindex = dev->ifindex,
86872cb5
TG
3485 .fc_dst_len = prefixlen,
3486 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3487 RTF_UP | RTF_PREF(pref),
b91d5329 3488 .fc_protocol = RTPROT_RA,
e8478e80 3489 .fc_type = RTN_UNICAST,
15e47304 3490 .fc_nlinfo.portid = 0,
efa2cea0
DL
3491 .fc_nlinfo.nlh = NULL,
3492 .fc_nlinfo.nl_net = net,
86872cb5
TG
3493 };
3494
830218c1 3495 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3496 cfg.fc_dst = *prefix;
3497 cfg.fc_gateway = *gwaddr;
70ceb4f5 3498
e317da96
YH
3499 /* We should treat it as a default route if prefix length is 0. */
3500 if (!prefixlen)
86872cb5 3501 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3502
acb54e3c 3503 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
70ceb4f5 3504
830218c1 3505 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3506}
3507#endif
3508
8d1c802b 3509struct fib6_info *rt6_get_dflt_router(struct net *net,
afb1d4b5
DA
3510 const struct in6_addr *addr,
3511 struct net_device *dev)
1ab1457c 3512{
830218c1 3513 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
8d1c802b 3514 struct fib6_info *rt;
c71099ac 3515 struct fib6_table *table;
1da177e4 3516
afb1d4b5 3517 table = fib6_get_table(net, tb_id);
38308473 3518 if (!table)
c71099ac 3519 return NULL;
1da177e4 3520
66f5d6ce
WW
3521 rcu_read_lock();
3522 for_each_fib6_node_rt_rcu(&table->tb6_root) {
5e670d84 3523 if (dev == rt->fib6_nh.nh_dev &&
93c2fb25 3524 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
5e670d84 3525 ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
1da177e4
LT
3526 break;
3527 }
e873e4b9
WW
3528 if (rt && !fib6_info_hold_safe(rt))
3529 rt = NULL;
66f5d6ce 3530 rcu_read_unlock();
1da177e4
LT
3531 return rt;
3532}
3533
8d1c802b 3534struct fib6_info *rt6_add_dflt_router(struct net *net,
afb1d4b5 3535 const struct in6_addr *gwaddr,
ebacaaa0
YH
3536 struct net_device *dev,
3537 unsigned int pref)
1da177e4 3538{
86872cb5 3539 struct fib6_config cfg = {
ca254490 3540 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3541 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3542 .fc_ifindex = dev->ifindex,
3543 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3544 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3545 .fc_protocol = RTPROT_RA,
e8478e80 3546 .fc_type = RTN_UNICAST,
15e47304 3547 .fc_nlinfo.portid = 0,
5578689a 3548 .fc_nlinfo.nlh = NULL,
afb1d4b5 3549 .fc_nlinfo.nl_net = net,
86872cb5 3550 };
1da177e4 3551
4e3fd7a0 3552 cfg.fc_gateway = *gwaddr;
1da177e4 3553
acb54e3c 3554 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
830218c1
DA
3555 struct fib6_table *table;
3556
3557 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3558 if (table)
3559 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3560 }
1da177e4 3561
afb1d4b5 3562 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3563}
3564
afb1d4b5
DA
3565static void __rt6_purge_dflt_routers(struct net *net,
3566 struct fib6_table *table)
1da177e4 3567{
8d1c802b 3568 struct fib6_info *rt;
1da177e4
LT
3569
3570restart:
66f5d6ce
WW
3571 rcu_read_lock();
3572 for_each_fib6_node_rt_rcu(&table->tb6_root) {
dcd1f572
DA
3573 struct net_device *dev = fib6_info_nh_dev(rt);
3574 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3575
93c2fb25 3576 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
e873e4b9
WW
3577 (!idev || idev->cnf.accept_ra != 2) &&
3578 fib6_info_hold_safe(rt)) {
93531c67
DA
3579 rcu_read_unlock();
3580 ip6_del_rt(net, rt);
1da177e4
LT
3581 goto restart;
3582 }
3583 }
66f5d6ce 3584 rcu_read_unlock();
830218c1
DA
3585
3586 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3587}
3588
3589void rt6_purge_dflt_routers(struct net *net)
3590{
3591 struct fib6_table *table;
3592 struct hlist_head *head;
3593 unsigned int h;
3594
3595 rcu_read_lock();
3596
3597 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3598 head = &net->ipv6.fib_table_hash[h];
3599 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3600 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3601 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3602 }
3603 }
3604
3605 rcu_read_unlock();
1da177e4
LT
3606}
3607
5578689a
DL
3608static void rtmsg_to_fib6_config(struct net *net,
3609 struct in6_rtmsg *rtmsg,
86872cb5
TG
3610 struct fib6_config *cfg)
3611{
3612 memset(cfg, 0, sizeof(*cfg));
3613
ca254490
DA
3614 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3615 : RT6_TABLE_MAIN;
86872cb5
TG
3616 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3617 cfg->fc_metric = rtmsg->rtmsg_metric;
3618 cfg->fc_expires = rtmsg->rtmsg_info;
3619 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3620 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3621 cfg->fc_flags = rtmsg->rtmsg_flags;
e8478e80 3622 cfg->fc_type = rtmsg->rtmsg_type;
86872cb5 3623
5578689a 3624 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3625
4e3fd7a0
AD
3626 cfg->fc_dst = rtmsg->rtmsg_dst;
3627 cfg->fc_src = rtmsg->rtmsg_src;
3628 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3629}
3630
5578689a 3631int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3632{
86872cb5 3633 struct fib6_config cfg;
1da177e4
LT
3634 struct in6_rtmsg rtmsg;
3635 int err;
3636
67ba4152 3637 switch (cmd) {
1da177e4
LT
3638 case SIOCADDRT: /* Add a route */
3639 case SIOCDELRT: /* Delete a route */
af31f412 3640 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3641 return -EPERM;
3642 err = copy_from_user(&rtmsg, arg,
3643 sizeof(struct in6_rtmsg));
3644 if (err)
3645 return -EFAULT;
86872cb5 3646
5578689a 3647 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3648
1da177e4
LT
3649 rtnl_lock();
3650 switch (cmd) {
3651 case SIOCADDRT:
acb54e3c 3652 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
1da177e4
LT
3653 break;
3654 case SIOCDELRT:
333c4301 3655 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3656 break;
3657 default:
3658 err = -EINVAL;
3659 }
3660 rtnl_unlock();
3661
3662 return err;
3ff50b79 3663 }
1da177e4
LT
3664
3665 return -EINVAL;
3666}
3667
3668/*
3669 * Drop the packet on the floor
3670 */
3671
d5fdd6ba 3672static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3673{
612f09e8 3674 int type;
adf30907 3675 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3676 switch (ipstats_mib_noroutes) {
3677 case IPSTATS_MIB_INNOROUTES:
0660e03f 3678 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3679 if (type == IPV6_ADDR_ANY) {
bdb7cc64
SS
3680 IP6_INC_STATS(dev_net(dst->dev),
3681 __in6_dev_get_safely(skb->dev),
3bd653c8 3682 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3683 break;
3684 }
3685 /* FALLTHROUGH */
3686 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3687 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3688 ipstats_mib_noroutes);
612f09e8
YH
3689 break;
3690 }
3ffe533c 3691 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3692 kfree_skb(skb);
3693 return 0;
3694}
3695
9ce8ade0
TG
3696static int ip6_pkt_discard(struct sk_buff *skb)
3697{
612f09e8 3698 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3699}
3700
ede2059d 3701static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3702{
adf30907 3703 skb->dev = skb_dst(skb)->dev;
612f09e8 3704 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3705}
3706
9ce8ade0
TG
3707static int ip6_pkt_prohibit(struct sk_buff *skb)
3708{
612f09e8 3709 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3710}
3711
ede2059d 3712static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3713{
adf30907 3714 skb->dev = skb_dst(skb)->dev;
612f09e8 3715 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3716}
3717
1da177e4
LT
3718/*
3719 * Allocate a dst for local (unicast / anycast) address.
3720 */
3721
360a9887
DA
3722struct fib6_info *addrconf_f6i_alloc(struct net *net,
3723 struct inet6_dev *idev,
3724 const struct in6_addr *addr,
3725 bool anycast, gfp_t gfp_flags)
1da177e4 3726{
ca254490 3727 u32 tb_id;
4832c30d 3728 struct net_device *dev = idev->dev;
360a9887 3729 struct fib6_info *f6i;
5f02ce24 3730
360a9887
DA
3731 f6i = fib6_info_alloc(gfp_flags);
3732 if (!f6i)
1da177e4
LT
3733 return ERR_PTR(-ENOMEM);
3734
360a9887 3735 f6i->dst_nocount = true;
360a9887
DA
3736 f6i->dst_host = true;
3737 f6i->fib6_protocol = RTPROT_KERNEL;
3738 f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
e8478e80 3739 if (anycast) {
360a9887
DA
3740 f6i->fib6_type = RTN_ANYCAST;
3741 f6i->fib6_flags |= RTF_ANYCAST;
e8478e80 3742 } else {
360a9887
DA
3743 f6i->fib6_type = RTN_LOCAL;
3744 f6i->fib6_flags |= RTF_LOCAL;
e8478e80 3745 }
1da177e4 3746
360a9887 3747 f6i->fib6_nh.nh_gw = *addr;
93531c67 3748 dev_hold(dev);
360a9887
DA
3749 f6i->fib6_nh.nh_dev = dev;
3750 f6i->fib6_dst.addr = *addr;
3751 f6i->fib6_dst.plen = 128;
ca254490 3752 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
360a9887 3753 f6i->fib6_table = fib6_get_table(net, tb_id);
1da177e4 3754
360a9887 3755 return f6i;
1da177e4
LT
3756}
3757
c3968a85
DW
3758/* remove deleted ip from prefsrc entries */
3759struct arg_dev_net_ip {
3760 struct net_device *dev;
3761 struct net *net;
3762 struct in6_addr *addr;
3763};
3764
8d1c802b 3765static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
c3968a85
DW
3766{
3767 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3768 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3769 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3770
5e670d84 3771 if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
421842ed 3772 rt != net->ipv6.fib6_null_entry &&
93c2fb25 3773 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
60006a48 3774 spin_lock_bh(&rt6_exception_lock);
c3968a85 3775 /* remove prefsrc entry */
93c2fb25 3776 rt->fib6_prefsrc.plen = 0;
60006a48 3777 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3778 }
3779 return 0;
3780}
3781
3782void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3783{
3784 struct net *net = dev_net(ifp->idev->dev);
3785 struct arg_dev_net_ip adni = {
3786 .dev = ifp->idev->dev,
3787 .net = net,
3788 .addr = &ifp->addr,
3789 };
0c3584d5 3790 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3791}
3792
be7a010d 3793#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3794
3795/* Remove routers and update dst entries when gateway turn into host. */
8d1c802b 3796static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
be7a010d
DJ
3797{
3798 struct in6_addr *gateway = (struct in6_addr *)arg;
3799
93c2fb25 3800 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
5e670d84 3801 ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
be7a010d
DJ
3802 return -1;
3803 }
b16cb459
WW
3804
3805 /* Further clean up cached routes in exception table.
3806 * This is needed because cached route may have a different
3807 * gateway than its 'parent' in the case of an ip redirect.
3808 */
3809 rt6_exceptions_clean_tohost(rt, gateway);
3810
be7a010d
DJ
3811 return 0;
3812}
3813
3814void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3815{
3816 fib6_clean_all(net, fib6_clean_tohost, gateway);
3817}
3818
2127d95a
IS
3819struct arg_netdev_event {
3820 const struct net_device *dev;
4c981e28
IS
3821 union {
3822 unsigned int nh_flags;
3823 unsigned long event;
3824 };
2127d95a
IS
3825};
3826
8d1c802b 3827static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
d7dedee1 3828{
8d1c802b 3829 struct fib6_info *iter;
d7dedee1
IS
3830 struct fib6_node *fn;
3831
93c2fb25
DA
3832 fn = rcu_dereference_protected(rt->fib6_node,
3833 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3834 iter = rcu_dereference_protected(fn->leaf,
93c2fb25 3835 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3836 while (iter) {
93c2fb25 3837 if (iter->fib6_metric == rt->fib6_metric &&
33bd5ac5 3838 rt6_qualify_for_ecmp(iter))
d7dedee1 3839 return iter;
8fb11a9a 3840 iter = rcu_dereference_protected(iter->fib6_next,
93c2fb25 3841 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1
IS
3842 }
3843
3844 return NULL;
3845}
3846
8d1c802b 3847static bool rt6_is_dead(const struct fib6_info *rt)
d7dedee1 3848{
5e670d84
DA
3849 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3850 (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
dcd1f572 3851 fib6_ignore_linkdown(rt)))
d7dedee1
IS
3852 return true;
3853
3854 return false;
3855}
3856
8d1c802b 3857static int rt6_multipath_total_weight(const struct fib6_info *rt)
d7dedee1 3858{
8d1c802b 3859 struct fib6_info *iter;
d7dedee1
IS
3860 int total = 0;
3861
3862 if (!rt6_is_dead(rt))
5e670d84 3863 total += rt->fib6_nh.nh_weight;
d7dedee1 3864
93c2fb25 3865 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
d7dedee1 3866 if (!rt6_is_dead(iter))
5e670d84 3867 total += iter->fib6_nh.nh_weight;
d7dedee1
IS
3868 }
3869
3870 return total;
3871}
3872
8d1c802b 3873static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
d7dedee1
IS
3874{
3875 int upper_bound = -1;
3876
3877 if (!rt6_is_dead(rt)) {
5e670d84 3878 *weight += rt->fib6_nh.nh_weight;
d7dedee1
IS
3879 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3880 total) - 1;
3881 }
5e670d84 3882 atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
d7dedee1
IS
3883}
3884
8d1c802b 3885static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
d7dedee1 3886{
8d1c802b 3887 struct fib6_info *iter;
d7dedee1
IS
3888 int weight = 0;
3889
3890 rt6_upper_bound_set(rt, &weight, total);
3891
93c2fb25 3892 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
d7dedee1
IS
3893 rt6_upper_bound_set(iter, &weight, total);
3894}
3895
8d1c802b 3896void rt6_multipath_rebalance(struct fib6_info *rt)
d7dedee1 3897{
8d1c802b 3898 struct fib6_info *first;
d7dedee1
IS
3899 int total;
3900
3901 /* In case the entire multipath route was marked for flushing,
3902 * then there is no need to rebalance upon the removal of every
3903 * sibling route.
3904 */
93c2fb25 3905 if (!rt->fib6_nsiblings || rt->should_flush)
d7dedee1
IS
3906 return;
3907
3908 /* During lookup routes are evaluated in order, so we need to
3909 * make sure upper bounds are assigned from the first sibling
3910 * onwards.
3911 */
3912 first = rt6_multipath_first_sibling(rt);
3913 if (WARN_ON_ONCE(!first))
3914 return;
3915
3916 total = rt6_multipath_total_weight(first);
3917 rt6_multipath_upper_bound_set(first, total);
3918}
3919
8d1c802b 3920static int fib6_ifup(struct fib6_info *rt, void *p_arg)
2127d95a
IS
3921{
3922 const struct arg_netdev_event *arg = p_arg;
7aef6859 3923 struct net *net = dev_net(arg->dev);
2127d95a 3924
421842ed 3925 if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
5e670d84 3926 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
7aef6859 3927 fib6_update_sernum_upto_root(net, rt);
d7dedee1 3928 rt6_multipath_rebalance(rt);
1de178ed 3929 }
2127d95a
IS
3930
3931 return 0;
3932}
3933
3934void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3935{
3936 struct arg_netdev_event arg = {
3937 .dev = dev,
6802f3ad
IS
3938 {
3939 .nh_flags = nh_flags,
3940 },
2127d95a
IS
3941 };
3942
3943 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3944 arg.nh_flags |= RTNH_F_LINKDOWN;
3945
3946 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3947}
3948
8d1c802b 3949static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
1de178ed
IS
3950 const struct net_device *dev)
3951{
8d1c802b 3952 struct fib6_info *iter;
1de178ed 3953
5e670d84 3954 if (rt->fib6_nh.nh_dev == dev)
1de178ed 3955 return true;
93c2fb25 3956 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84 3957 if (iter->fib6_nh.nh_dev == dev)
1de178ed
IS
3958 return true;
3959
3960 return false;
3961}
3962
8d1c802b 3963static void rt6_multipath_flush(struct fib6_info *rt)
1de178ed 3964{
8d1c802b 3965 struct fib6_info *iter;
1de178ed
IS
3966
3967 rt->should_flush = 1;
93c2fb25 3968 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1de178ed
IS
3969 iter->should_flush = 1;
3970}
3971
8d1c802b 3972static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
1de178ed
IS
3973 const struct net_device *down_dev)
3974{
8d1c802b 3975 struct fib6_info *iter;
1de178ed
IS
3976 unsigned int dead = 0;
3977
5e670d84
DA
3978 if (rt->fib6_nh.nh_dev == down_dev ||
3979 rt->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed 3980 dead++;
93c2fb25 3981 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
3982 if (iter->fib6_nh.nh_dev == down_dev ||
3983 iter->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed
IS
3984 dead++;
3985
3986 return dead;
3987}
3988
8d1c802b 3989static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
1de178ed
IS
3990 const struct net_device *dev,
3991 unsigned int nh_flags)
3992{
8d1c802b 3993 struct fib6_info *iter;
1de178ed 3994
5e670d84
DA
3995 if (rt->fib6_nh.nh_dev == dev)
3996 rt->fib6_nh.nh_flags |= nh_flags;
93c2fb25 3997 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
3998 if (iter->fib6_nh.nh_dev == dev)
3999 iter->fib6_nh.nh_flags |= nh_flags;
1de178ed
IS
4000}
4001
a1a22c12 4002/* called with write lock held for table with rt */
8d1c802b 4003static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
1da177e4 4004{
4c981e28
IS
4005 const struct arg_netdev_event *arg = p_arg;
4006 const struct net_device *dev = arg->dev;
7aef6859 4007 struct net *net = dev_net(dev);
8ed67789 4008
421842ed 4009 if (rt == net->ipv6.fib6_null_entry)
27c6fa73
IS
4010 return 0;
4011
4012 switch (arg->event) {
4013 case NETDEV_UNREGISTER:
5e670d84 4014 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
27c6fa73 4015 case NETDEV_DOWN:
1de178ed 4016 if (rt->should_flush)
27c6fa73 4017 return -1;
93c2fb25 4018 if (!rt->fib6_nsiblings)
5e670d84 4019 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
1de178ed
IS
4020 if (rt6_multipath_uses_dev(rt, dev)) {
4021 unsigned int count;
4022
4023 count = rt6_multipath_dead_count(rt, dev);
93c2fb25 4024 if (rt->fib6_nsiblings + 1 == count) {
1de178ed
IS
4025 rt6_multipath_flush(rt);
4026 return -1;
4027 }
4028 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4029 RTNH_F_LINKDOWN);
7aef6859 4030 fib6_update_sernum(net, rt);
d7dedee1 4031 rt6_multipath_rebalance(rt);
1de178ed
IS
4032 }
4033 return -2;
27c6fa73 4034 case NETDEV_CHANGE:
5e670d84 4035 if (rt->fib6_nh.nh_dev != dev ||
93c2fb25 4036 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 4037 break;
5e670d84 4038 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 4039 rt6_multipath_rebalance(rt);
27c6fa73 4040 break;
2b241361 4041 }
c159d30c 4042
1da177e4
LT
4043 return 0;
4044}
4045
27c6fa73 4046void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 4047{
4c981e28 4048 struct arg_netdev_event arg = {
8ed67789 4049 .dev = dev,
6802f3ad
IS
4050 {
4051 .event = event,
4052 },
8ed67789
DL
4053 };
4054
4c981e28
IS
4055 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
4056}
4057
4058void rt6_disable_ip(struct net_device *dev, unsigned long event)
4059{
4060 rt6_sync_down_dev(dev, event);
4061 rt6_uncached_list_flush_dev(dev_net(dev), dev);
4062 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
4063}
4064
95c96174 4065struct rt6_mtu_change_arg {
1da177e4 4066 struct net_device *dev;
95c96174 4067 unsigned int mtu;
1da177e4
LT
4068};
4069
8d1c802b 4070static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4071{
4072 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4073 struct inet6_dev *idev;
4074
4075 /* In IPv6 pmtu discovery is not optional,
4076 so that RTAX_MTU lock cannot disable it.
4077 We still use this lock to block changes
4078 caused by addrconf/ndisc.
4079 */
4080
4081 idev = __in6_dev_get(arg->dev);
38308473 4082 if (!idev)
1da177e4
LT
4083 return 0;
4084
4085 /* For administrative MTU increase, there is no way to discover
4086 IPv6 PMTU increase, so PMTU increase should be updated here.
4087 Since RFC 1981 doesn't include administrative MTU increase
4088 update PMTU increase is a MUST. (i.e. jumbo frame)
4089 */
5e670d84 4090 if (rt->fib6_nh.nh_dev == arg->dev &&
d4ead6b3
DA
4091 !fib6_metric_locked(rt, RTAX_MTU)) {
4092 u32 mtu = rt->fib6_pmtu;
4093
4094 if (mtu >= arg->mtu ||
4095 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4096 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4097
f5bbe7ee 4098 spin_lock_bh(&rt6_exception_lock);
e9fa1495 4099 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 4100 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4101 }
1da177e4
LT
4102 return 0;
4103}
4104
95c96174 4105void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4106{
c71099ac
TG
4107 struct rt6_mtu_change_arg arg = {
4108 .dev = dev,
4109 .mtu = mtu,
4110 };
1da177e4 4111
0c3584d5 4112 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4113}
4114
ef7c79ed 4115static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4116 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
aa8f8778 4117 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
86872cb5 4118 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4119 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4120 [RTA_PRIORITY] = { .type = NLA_U32 },
4121 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4122 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4123 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4124 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4125 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4126 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4127 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4128 [RTA_MARK] = { .type = NLA_U32 },
aa8f8778 4129 [RTA_TABLE] = { .type = NLA_U32 },
eacb9384
RP
4130 [RTA_IP_PROTO] = { .type = NLA_U8 },
4131 [RTA_SPORT] = { .type = NLA_U16 },
4132 [RTA_DPORT] = { .type = NLA_U16 },
86872cb5
TG
4133};
4134
4135static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4136 struct fib6_config *cfg,
4137 struct netlink_ext_ack *extack)
1da177e4 4138{
86872cb5
TG
4139 struct rtmsg *rtm;
4140 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4141 unsigned int pref;
86872cb5 4142 int err;
1da177e4 4143
fceb6435
JB
4144 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4145 NULL);
86872cb5
TG
4146 if (err < 0)
4147 goto errout;
1da177e4 4148
86872cb5
TG
4149 err = -EINVAL;
4150 rtm = nlmsg_data(nlh);
4151 memset(cfg, 0, sizeof(*cfg));
4152
4153 cfg->fc_table = rtm->rtm_table;
4154 cfg->fc_dst_len = rtm->rtm_dst_len;
4155 cfg->fc_src_len = rtm->rtm_src_len;
4156 cfg->fc_flags = RTF_UP;
4157 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 4158 cfg->fc_type = rtm->rtm_type;
86872cb5 4159
ef2c7d7b
ND
4160 if (rtm->rtm_type == RTN_UNREACHABLE ||
4161 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4162 rtm->rtm_type == RTN_PROHIBIT ||
4163 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4164 cfg->fc_flags |= RTF_REJECT;
4165
ab79ad14
4166 if (rtm->rtm_type == RTN_LOCAL)
4167 cfg->fc_flags |= RTF_LOCAL;
4168
1f56a01f
MKL
4169 if (rtm->rtm_flags & RTM_F_CLONED)
4170 cfg->fc_flags |= RTF_CACHE;
4171
fc1e64e1
DA
4172 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4173
15e47304 4174 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 4175 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 4176 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
4177
4178 if (tb[RTA_GATEWAY]) {
67b61f6c 4179 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4180 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4181 }
86872cb5
TG
4182
4183 if (tb[RTA_DST]) {
4184 int plen = (rtm->rtm_dst_len + 7) >> 3;
4185
4186 if (nla_len(tb[RTA_DST]) < plen)
4187 goto errout;
4188
4189 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4190 }
86872cb5
TG
4191
4192 if (tb[RTA_SRC]) {
4193 int plen = (rtm->rtm_src_len + 7) >> 3;
4194
4195 if (nla_len(tb[RTA_SRC]) < plen)
4196 goto errout;
4197
4198 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4199 }
86872cb5 4200
c3968a85 4201 if (tb[RTA_PREFSRC])
67b61f6c 4202 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4203
86872cb5
TG
4204 if (tb[RTA_OIF])
4205 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4206
4207 if (tb[RTA_PRIORITY])
4208 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4209
4210 if (tb[RTA_METRICS]) {
4211 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4212 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4213 }
86872cb5
TG
4214
4215 if (tb[RTA_TABLE])
4216 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4217
51ebd318
ND
4218 if (tb[RTA_MULTIPATH]) {
4219 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4220 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4221
4222 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4223 cfg->fc_mp_len, extack);
9ed59592
DA
4224 if (err < 0)
4225 goto errout;
51ebd318
ND
4226 }
4227
c78ba6d6
LR
4228 if (tb[RTA_PREF]) {
4229 pref = nla_get_u8(tb[RTA_PREF]);
4230 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4231 pref != ICMPV6_ROUTER_PREF_HIGH)
4232 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4233 cfg->fc_flags |= RTF_PREF(pref);
4234 }
4235
19e42e45
RP
4236 if (tb[RTA_ENCAP])
4237 cfg->fc_encap = tb[RTA_ENCAP];
4238
9ed59592 4239 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4240 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4241
c255bd68 4242 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4243 if (err < 0)
4244 goto errout;
4245 }
4246
32bc201e
XL
4247 if (tb[RTA_EXPIRES]) {
4248 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4249
4250 if (addrconf_finite_timeout(timeout)) {
4251 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4252 cfg->fc_flags |= RTF_EXPIRES;
4253 }
4254 }
4255
86872cb5
TG
4256 err = 0;
4257errout:
4258 return err;
1da177e4
LT
4259}
4260
6b9ea5a6 4261struct rt6_nh {
8d1c802b 4262 struct fib6_info *fib6_info;
6b9ea5a6 4263 struct fib6_config r_cfg;
6b9ea5a6
RP
4264 struct list_head next;
4265};
4266
4267static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4268{
4269 struct rt6_nh *nh;
4270
4271 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 4272 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
4273 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4274 nh->r_cfg.fc_ifindex);
4275 }
4276}
4277
d4ead6b3
DA
4278static int ip6_route_info_append(struct net *net,
4279 struct list_head *rt6_nh_list,
8d1c802b
DA
4280 struct fib6_info *rt,
4281 struct fib6_config *r_cfg)
6b9ea5a6
RP
4282{
4283 struct rt6_nh *nh;
6b9ea5a6
RP
4284 int err = -EEXIST;
4285
4286 list_for_each_entry(nh, rt6_nh_list, next) {
8d1c802b
DA
4287 /* check if fib6_info already exists */
4288 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
6b9ea5a6
RP
4289 return err;
4290 }
4291
4292 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4293 if (!nh)
4294 return -ENOMEM;
8d1c802b 4295 nh->fib6_info = rt;
d4ead6b3 4296 err = ip6_convert_metrics(net, rt, r_cfg);
6b9ea5a6
RP
4297 if (err) {
4298 kfree(nh);
4299 return err;
4300 }
4301 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4302 list_add_tail(&nh->next, rt6_nh_list);
4303
4304 return 0;
4305}
4306
8d1c802b
DA
4307static void ip6_route_mpath_notify(struct fib6_info *rt,
4308 struct fib6_info *rt_last,
3b1137fe
DA
4309 struct nl_info *info,
4310 __u16 nlflags)
4311{
4312 /* if this is an APPEND route, then rt points to the first route
4313 * inserted and rt_last points to last route inserted. Userspace
4314 * wants a consistent dump of the route which starts at the first
4315 * nexthop. Since sibling routes are always added at the end of
4316 * the list, find the first sibling of the last route appended
4317 */
93c2fb25
DA
4318 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4319 rt = list_first_entry(&rt_last->fib6_siblings,
8d1c802b 4320 struct fib6_info,
93c2fb25 4321 fib6_siblings);
3b1137fe
DA
4322 }
4323
4324 if (rt)
4325 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4326}
4327
333c4301
DA
4328static int ip6_route_multipath_add(struct fib6_config *cfg,
4329 struct netlink_ext_ack *extack)
51ebd318 4330{
8d1c802b 4331 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
3b1137fe 4332 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4333 struct fib6_config r_cfg;
4334 struct rtnexthop *rtnh;
8d1c802b 4335 struct fib6_info *rt;
6b9ea5a6
RP
4336 struct rt6_nh *err_nh;
4337 struct rt6_nh *nh, *nh_safe;
3b1137fe 4338 __u16 nlflags;
51ebd318
ND
4339 int remaining;
4340 int attrlen;
6b9ea5a6
RP
4341 int err = 1;
4342 int nhn = 0;
4343 int replace = (cfg->fc_nlinfo.nlh &&
4344 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4345 LIST_HEAD(rt6_nh_list);
51ebd318 4346
3b1137fe
DA
4347 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4348 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4349 nlflags |= NLM_F_APPEND;
4350
35f1b4e9 4351 remaining = cfg->fc_mp_len;
51ebd318 4352 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4353
6b9ea5a6 4354 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
8d1c802b 4355 * fib6_info structs per nexthop
6b9ea5a6 4356 */
51ebd318
ND
4357 while (rtnh_ok(rtnh, remaining)) {
4358 memcpy(&r_cfg, cfg, sizeof(*cfg));
4359 if (rtnh->rtnh_ifindex)
4360 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4361
4362 attrlen = rtnh_attrlen(rtnh);
4363 if (attrlen > 0) {
4364 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4365
4366 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4367 if (nla) {
67b61f6c 4368 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4369 r_cfg.fc_flags |= RTF_GATEWAY;
4370 }
19e42e45
RP
4371 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4372 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4373 if (nla)
4374 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4375 }
6b9ea5a6 4376
68e2ffde 4377 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
acb54e3c 4378 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
8c5b83f0
RP
4379 if (IS_ERR(rt)) {
4380 err = PTR_ERR(rt);
4381 rt = NULL;
6b9ea5a6 4382 goto cleanup;
8c5b83f0 4383 }
b5d2d75e
DA
4384 if (!rt6_qualify_for_ecmp(rt)) {
4385 err = -EINVAL;
4386 NL_SET_ERR_MSG(extack,
4387 "Device only routes can not be added for IPv6 using the multipath API.");
4388 fib6_info_release(rt);
4389 goto cleanup;
4390 }
6b9ea5a6 4391
5e670d84 4392 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
398958ae 4393
d4ead6b3
DA
4394 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4395 rt, &r_cfg);
51ebd318 4396 if (err) {
93531c67 4397 fib6_info_release(rt);
6b9ea5a6
RP
4398 goto cleanup;
4399 }
4400
4401 rtnh = rtnh_next(rtnh, &remaining);
4402 }
4403
3b1137fe
DA
4404 /* for add and replace send one notification with all nexthops.
4405 * Skip the notification in fib6_add_rt2node and send one with
4406 * the full route when done
4407 */
4408 info->skip_notify = 1;
4409
6b9ea5a6
RP
4410 err_nh = NULL;
4411 list_for_each_entry(nh, &rt6_nh_list, next) {
8d1c802b
DA
4412 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4413 fib6_info_release(nh->fib6_info);
93531c67 4414
f7225172
DA
4415 if (!err) {
4416 /* save reference to last route successfully inserted */
4417 rt_last = nh->fib6_info;
4418
4419 /* save reference to first route for notification */
4420 if (!rt_notif)
4421 rt_notif = nh->fib6_info;
4422 }
3b1137fe 4423
8d1c802b
DA
4424 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4425 nh->fib6_info = NULL;
6b9ea5a6
RP
4426 if (err) {
4427 if (replace && nhn)
4428 ip6_print_replace_route_err(&rt6_nh_list);
4429 err_nh = nh;
4430 goto add_errout;
51ebd318 4431 }
6b9ea5a6 4432
1a72418b 4433 /* Because each route is added like a single route we remove
27596472
MK
4434 * these flags after the first nexthop: if there is a collision,
4435 * we have already failed to add the first nexthop:
4436 * fib6_add_rt2node() has rejected it; when replacing, old
4437 * nexthops have been replaced by first new, the rest should
4438 * be added to it.
1a72418b 4439 */
27596472
MK
4440 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4441 NLM_F_REPLACE);
6b9ea5a6
RP
4442 nhn++;
4443 }
4444
3b1137fe
DA
4445 /* success ... tell user about new route */
4446 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4447 goto cleanup;
4448
4449add_errout:
3b1137fe
DA
4450 /* send notification for routes that were added so that
4451 * the delete notifications sent by ip6_route_del are
4452 * coherent
4453 */
4454 if (rt_notif)
4455 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4456
6b9ea5a6
RP
4457 /* Delete routes that were already added */
4458 list_for_each_entry(nh, &rt6_nh_list, next) {
4459 if (err_nh == nh)
4460 break;
333c4301 4461 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4462 }
4463
4464cleanup:
4465 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
8d1c802b
DA
4466 if (nh->fib6_info)
4467 fib6_info_release(nh->fib6_info);
6b9ea5a6
RP
4468 list_del(&nh->next);
4469 kfree(nh);
4470 }
4471
4472 return err;
4473}
4474
333c4301
DA
4475static int ip6_route_multipath_del(struct fib6_config *cfg,
4476 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4477{
4478 struct fib6_config r_cfg;
4479 struct rtnexthop *rtnh;
4480 int remaining;
4481 int attrlen;
4482 int err = 1, last_err = 0;
4483
4484 remaining = cfg->fc_mp_len;
4485 rtnh = (struct rtnexthop *)cfg->fc_mp;
4486
4487 /* Parse a Multipath Entry */
4488 while (rtnh_ok(rtnh, remaining)) {
4489 memcpy(&r_cfg, cfg, sizeof(*cfg));
4490 if (rtnh->rtnh_ifindex)
4491 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4492
4493 attrlen = rtnh_attrlen(rtnh);
4494 if (attrlen > 0) {
4495 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4496
4497 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4498 if (nla) {
4499 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4500 r_cfg.fc_flags |= RTF_GATEWAY;
4501 }
4502 }
333c4301 4503 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4504 if (err)
4505 last_err = err;
4506
51ebd318
ND
4507 rtnh = rtnh_next(rtnh, &remaining);
4508 }
4509
4510 return last_err;
4511}
4512
c21ef3e3
DA
4513static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4514 struct netlink_ext_ack *extack)
1da177e4 4515{
86872cb5
TG
4516 struct fib6_config cfg;
4517 int err;
1da177e4 4518
333c4301 4519 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4520 if (err < 0)
4521 return err;
4522
51ebd318 4523 if (cfg.fc_mp)
333c4301 4524 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4525 else {
4526 cfg.fc_delete_all_nh = 1;
333c4301 4527 return ip6_route_del(&cfg, extack);
0ae81335 4528 }
1da177e4
LT
4529}
4530
c21ef3e3
DA
4531static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4532 struct netlink_ext_ack *extack)
1da177e4 4533{
86872cb5
TG
4534 struct fib6_config cfg;
4535 int err;
1da177e4 4536
333c4301 4537 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4538 if (err < 0)
4539 return err;
4540
51ebd318 4541 if (cfg.fc_mp)
333c4301 4542 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4543 else
acb54e3c 4544 return ip6_route_add(&cfg, GFP_KERNEL, extack);
1da177e4
LT
4545}
4546
8d1c802b 4547static size_t rt6_nlmsg_size(struct fib6_info *rt)
339bf98f 4548{
beb1afac
DA
4549 int nexthop_len = 0;
4550
93c2fb25 4551 if (rt->fib6_nsiblings) {
beb1afac
DA
4552 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4553 + NLA_ALIGN(sizeof(struct rtnexthop))
4554 + nla_total_size(16) /* RTA_GATEWAY */
5e670d84 4555 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
beb1afac 4556
93c2fb25 4557 nexthop_len *= rt->fib6_nsiblings;
beb1afac
DA
4558 }
4559
339bf98f
TG
4560 return NLMSG_ALIGN(sizeof(struct rtmsg))
4561 + nla_total_size(16) /* RTA_SRC */
4562 + nla_total_size(16) /* RTA_DST */
4563 + nla_total_size(16) /* RTA_GATEWAY */
4564 + nla_total_size(16) /* RTA_PREFSRC */
4565 + nla_total_size(4) /* RTA_TABLE */
4566 + nla_total_size(4) /* RTA_IIF */
4567 + nla_total_size(4) /* RTA_OIF */
4568 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4569 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4570 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4571 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4572 + nla_total_size(1) /* RTA_PREF */
5e670d84 4573 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
beb1afac
DA
4574 + nexthop_len;
4575}
4576
8d1c802b 4577static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
5be083ce 4578 unsigned int *flags, bool skip_oif)
beb1afac 4579{
5e670d84 4580 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
f9d882ea
IS
4581 *flags |= RTNH_F_DEAD;
4582
5e670d84 4583 if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
beb1afac 4584 *flags |= RTNH_F_LINKDOWN;
dcd1f572
DA
4585
4586 rcu_read_lock();
4587 if (fib6_ignore_linkdown(rt))
beb1afac 4588 *flags |= RTNH_F_DEAD;
dcd1f572 4589 rcu_read_unlock();
beb1afac
DA
4590 }
4591
93c2fb25 4592 if (rt->fib6_flags & RTF_GATEWAY) {
5e670d84 4593 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
beb1afac
DA
4594 goto nla_put_failure;
4595 }
4596
5e670d84
DA
4597 *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4598 if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
4599 *flags |= RTNH_F_OFFLOAD;
4600
5be083ce 4601 /* not needed for multipath encoding b/c it has a rtnexthop struct */
5e670d84
DA
4602 if (!skip_oif && rt->fib6_nh.nh_dev &&
4603 nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
beb1afac
DA
4604 goto nla_put_failure;
4605
5e670d84
DA
4606 if (rt->fib6_nh.nh_lwtstate &&
4607 lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
beb1afac
DA
4608 goto nla_put_failure;
4609
4610 return 0;
4611
4612nla_put_failure:
4613 return -EMSGSIZE;
4614}
4615
5be083ce 4616/* add multipath next hop */
8d1c802b 4617static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
beb1afac 4618{
5e670d84 4619 const struct net_device *dev = rt->fib6_nh.nh_dev;
beb1afac
DA
4620 struct rtnexthop *rtnh;
4621 unsigned int flags = 0;
4622
4623 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4624 if (!rtnh)
4625 goto nla_put_failure;
4626
5e670d84
DA
4627 rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4628 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
beb1afac 4629
5be083ce 4630 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4631 goto nla_put_failure;
4632
4633 rtnh->rtnh_flags = flags;
4634
4635 /* length of rtnetlink header + attributes */
4636 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4637
4638 return 0;
4639
4640nla_put_failure:
4641 return -EMSGSIZE;
339bf98f
TG
4642}
4643
d4ead6b3 4644static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 4645 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 4646 struct in6_addr *dest, struct in6_addr *src,
15e47304 4647 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4648 unsigned int flags)
1da177e4 4649{
22d0bd82
XL
4650 struct rt6_info *rt6 = (struct rt6_info *)dst;
4651 struct rt6key *rt6_dst, *rt6_src;
4652 u32 *pmetrics, table, rt6_flags;
2d7202bf 4653 struct nlmsghdr *nlh;
22d0bd82 4654 struct rtmsg *rtm;
d4ead6b3 4655 long expires = 0;
1da177e4 4656
15e47304 4657 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4658 if (!nlh)
26932566 4659 return -EMSGSIZE;
2d7202bf 4660
22d0bd82
XL
4661 if (rt6) {
4662 rt6_dst = &rt6->rt6i_dst;
4663 rt6_src = &rt6->rt6i_src;
4664 rt6_flags = rt6->rt6i_flags;
4665 } else {
4666 rt6_dst = &rt->fib6_dst;
4667 rt6_src = &rt->fib6_src;
4668 rt6_flags = rt->fib6_flags;
4669 }
4670
2d7202bf 4671 rtm = nlmsg_data(nlh);
1da177e4 4672 rtm->rtm_family = AF_INET6;
22d0bd82
XL
4673 rtm->rtm_dst_len = rt6_dst->plen;
4674 rtm->rtm_src_len = rt6_src->plen;
1da177e4 4675 rtm->rtm_tos = 0;
93c2fb25
DA
4676 if (rt->fib6_table)
4677 table = rt->fib6_table->tb6_id;
c71099ac 4678 else
9e762a4a
PM
4679 table = RT6_TABLE_UNSPEC;
4680 rtm->rtm_table = table;
c78679e8
DM
4681 if (nla_put_u32(skb, RTA_TABLE, table))
4682 goto nla_put_failure;
e8478e80
DA
4683
4684 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4685 rtm->rtm_flags = 0;
4686 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
93c2fb25 4687 rtm->rtm_protocol = rt->fib6_protocol;
1da177e4 4688
22d0bd82 4689 if (rt6_flags & RTF_CACHE)
1da177e4
LT
4690 rtm->rtm_flags |= RTM_F_CLONED;
4691
d4ead6b3
DA
4692 if (dest) {
4693 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4694 goto nla_put_failure;
1ab1457c 4695 rtm->rtm_dst_len = 128;
1da177e4 4696 } else if (rtm->rtm_dst_len)
22d0bd82 4697 if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
c78679e8 4698 goto nla_put_failure;
1da177e4
LT
4699#ifdef CONFIG_IPV6_SUBTREES
4700 if (src) {
930345ea 4701 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4702 goto nla_put_failure;
1ab1457c 4703 rtm->rtm_src_len = 128;
c78679e8 4704 } else if (rtm->rtm_src_len &&
22d0bd82 4705 nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
c78679e8 4706 goto nla_put_failure;
1da177e4 4707#endif
7bc570c8
YH
4708 if (iif) {
4709#ifdef CONFIG_IPV6_MROUTE
22d0bd82 4710 if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
fd61c6ba
DA
4711 int err = ip6mr_get_route(net, skb, rtm, portid);
4712
4713 if (err == 0)
4714 return 0;
4715 if (err < 0)
4716 goto nla_put_failure;
7bc570c8
YH
4717 } else
4718#endif
c78679e8
DM
4719 if (nla_put_u32(skb, RTA_IIF, iif))
4720 goto nla_put_failure;
d4ead6b3 4721 } else if (dest) {
1da177e4 4722 struct in6_addr saddr_buf;
d4ead6b3 4723 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4724 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4725 goto nla_put_failure;
1da177e4 4726 }
2d7202bf 4727
93c2fb25 4728 if (rt->fib6_prefsrc.plen) {
c3968a85 4729 struct in6_addr saddr_buf;
93c2fb25 4730 saddr_buf = rt->fib6_prefsrc.addr;
930345ea 4731 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4732 goto nla_put_failure;
c3968a85
DW
4733 }
4734
d4ead6b3
DA
4735 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4736 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4737 goto nla_put_failure;
4738
93c2fb25 4739 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
c78679e8 4740 goto nla_put_failure;
8253947e 4741
beb1afac
DA
4742 /* For multipath routes, walk the siblings list and add
4743 * each as a nexthop within RTA_MULTIPATH.
4744 */
22d0bd82
XL
4745 if (rt6) {
4746 if (rt6_flags & RTF_GATEWAY &&
4747 nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
4748 goto nla_put_failure;
4749
4750 if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
4751 goto nla_put_failure;
4752 } else if (rt->fib6_nsiblings) {
8d1c802b 4753 struct fib6_info *sibling, *next_sibling;
beb1afac
DA
4754 struct nlattr *mp;
4755
4756 mp = nla_nest_start(skb, RTA_MULTIPATH);
4757 if (!mp)
4758 goto nla_put_failure;
4759
4760 if (rt6_add_nexthop(skb, rt) < 0)
4761 goto nla_put_failure;
4762
4763 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25 4764 &rt->fib6_siblings, fib6_siblings) {
beb1afac
DA
4765 if (rt6_add_nexthop(skb, sibling) < 0)
4766 goto nla_put_failure;
4767 }
4768
4769 nla_nest_end(skb, mp);
4770 } else {
5be083ce 4771 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4772 goto nla_put_failure;
4773 }
4774
22d0bd82 4775 if (rt6_flags & RTF_EXPIRES) {
14895687
DA
4776 expires = dst ? dst->expires : rt->expires;
4777 expires -= jiffies;
4778 }
69cdf8f9 4779
d4ead6b3 4780 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 4781 goto nla_put_failure;
2d7202bf 4782
22d0bd82 4783 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
c78ba6d6
LR
4784 goto nla_put_failure;
4785
19e42e45 4786
053c095a
JB
4787 nlmsg_end(skb, nlh);
4788 return 0;
2d7202bf
TG
4789
4790nla_put_failure:
26932566
PM
4791 nlmsg_cancel(skb, nlh);
4792 return -EMSGSIZE;
1da177e4
LT
4793}
4794
8d1c802b 4795int rt6_dump_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4796{
4797 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4798 struct net *net = arg->net;
4799
421842ed 4800 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 4801 return 0;
1da177e4 4802
2d7202bf
TG
4803 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4804 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4805
4806 /* user wants prefix routes only */
4807 if (rtm->rtm_flags & RTM_F_PREFIX &&
93c2fb25 4808 !(rt->fib6_flags & RTF_PREFIX_RT)) {
f8cfe2ce
DA
4809 /* success since this is not a prefix route */
4810 return 1;
4811 }
4812 }
1da177e4 4813
d4ead6b3
DA
4814 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4815 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4816 arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
1da177e4
LT
4817}
4818
c21ef3e3
DA
4819static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4820 struct netlink_ext_ack *extack)
1da177e4 4821{
3b1e0a65 4822 struct net *net = sock_net(in_skb->sk);
ab364a6f 4823 struct nlattr *tb[RTA_MAX+1];
18c3a61c 4824 int err, iif = 0, oif = 0;
a68886a6 4825 struct fib6_info *from;
18c3a61c 4826 struct dst_entry *dst;
ab364a6f 4827 struct rt6_info *rt;
1da177e4 4828 struct sk_buff *skb;
ab364a6f 4829 struct rtmsg *rtm;
4c9483b2 4830 struct flowi6 fl6;
18c3a61c 4831 bool fibmatch;
1da177e4 4832
fceb6435 4833 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4834 extack);
ab364a6f
TG
4835 if (err < 0)
4836 goto errout;
1da177e4 4837
ab364a6f 4838 err = -EINVAL;
4c9483b2 4839 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4840 rtm = nlmsg_data(nlh);
4841 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4842 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4843
ab364a6f
TG
4844 if (tb[RTA_SRC]) {
4845 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4846 goto errout;
4847
4e3fd7a0 4848 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4849 }
4850
4851 if (tb[RTA_DST]) {
4852 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4853 goto errout;
4854
4e3fd7a0 4855 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4856 }
4857
4858 if (tb[RTA_IIF])
4859 iif = nla_get_u32(tb[RTA_IIF]);
4860
4861 if (tb[RTA_OIF])
72331bc0 4862 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4863
2e47b291
LC
4864 if (tb[RTA_MARK])
4865 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4866
622ec2c9
LC
4867 if (tb[RTA_UID])
4868 fl6.flowi6_uid = make_kuid(current_user_ns(),
4869 nla_get_u32(tb[RTA_UID]));
4870 else
4871 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4872
eacb9384
RP
4873 if (tb[RTA_SPORT])
4874 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4875
4876 if (tb[RTA_DPORT])
4877 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4878
4879 if (tb[RTA_IP_PROTO]) {
4880 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
4881 &fl6.flowi6_proto, extack);
4882 if (err)
4883 goto errout;
4884 }
4885
1da177e4
LT
4886 if (iif) {
4887 struct net_device *dev;
72331bc0
SL
4888 int flags = 0;
4889
121622db
FW
4890 rcu_read_lock();
4891
4892 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4893 if (!dev) {
121622db 4894 rcu_read_unlock();
1da177e4 4895 err = -ENODEV;
ab364a6f 4896 goto errout;
1da177e4 4897 }
72331bc0
SL
4898
4899 fl6.flowi6_iif = iif;
4900
4901 if (!ipv6_addr_any(&fl6.saddr))
4902 flags |= RT6_LOOKUP_F_HAS_SADDR;
4903
b75cc8f9 4904 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
4905
4906 rcu_read_unlock();
72331bc0
SL
4907 } else {
4908 fl6.flowi6_oif = oif;
4909
58acfd71 4910 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4911 }
4912
18c3a61c
RP
4913
4914 rt = container_of(dst, struct rt6_info, dst);
4915 if (rt->dst.error) {
4916 err = rt->dst.error;
4917 ip6_rt_put(rt);
4918 goto errout;
1da177e4
LT
4919 }
4920
9d6acb3b
WC
4921 if (rt == net->ipv6.ip6_null_entry) {
4922 err = rt->dst.error;
4923 ip6_rt_put(rt);
4924 goto errout;
4925 }
4926
ab364a6f 4927 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4928 if (!skb) {
94e187c0 4929 ip6_rt_put(rt);
ab364a6f
TG
4930 err = -ENOBUFS;
4931 goto errout;
4932 }
1da177e4 4933
d8d1f30b 4934 skb_dst_set(skb, &rt->dst);
a68886a6
DA
4935
4936 rcu_read_lock();
4937 from = rcu_dereference(rt->from);
4938
18c3a61c 4939 if (fibmatch)
a68886a6 4940 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
18c3a61c
RP
4941 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4942 nlh->nlmsg_seq, 0);
4943 else
a68886a6
DA
4944 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
4945 &fl6.saddr, iif, RTM_NEWROUTE,
d4ead6b3
DA
4946 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4947 0);
a68886a6
DA
4948 rcu_read_unlock();
4949
1da177e4 4950 if (err < 0) {
ab364a6f
TG
4951 kfree_skb(skb);
4952 goto errout;
1da177e4
LT
4953 }
4954
15e47304 4955 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4956errout:
1da177e4 4957 return err;
1da177e4
LT
4958}
4959
8d1c802b 4960void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
37a1d361 4961 unsigned int nlm_flags)
1da177e4
LT
4962{
4963 struct sk_buff *skb;
5578689a 4964 struct net *net = info->nl_net;
528c4ceb
DL
4965 u32 seq;
4966 int err;
4967
4968 err = -ENOBUFS;
38308473 4969 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4970
19e42e45 4971 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4972 if (!skb)
21713ebc
TG
4973 goto errout;
4974
d4ead6b3
DA
4975 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4976 event, info->portid, seq, nlm_flags);
26932566
PM
4977 if (err < 0) {
4978 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4979 WARN_ON(err == -EMSGSIZE);
4980 kfree_skb(skb);
4981 goto errout;
4982 }
15e47304 4983 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4984 info->nlh, gfp_any());
4985 return;
21713ebc
TG
4986errout:
4987 if (err < 0)
5578689a 4988 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4989}
4990
8ed67789 4991static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4992 unsigned long event, void *ptr)
8ed67789 4993{
351638e7 4994 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 4995 struct net *net = dev_net(dev);
8ed67789 4996
242d3a49
WC
4997 if (!(dev->flags & IFF_LOOPBACK))
4998 return NOTIFY_OK;
4999
5000 if (event == NETDEV_REGISTER) {
421842ed 5001 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
d8d1f30b 5002 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
5003 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5004#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 5005 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 5006 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 5007 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 5008 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 5009#endif
76da0704
WC
5010 } else if (event == NETDEV_UNREGISTER &&
5011 dev->reg_state != NETREG_UNREGISTERED) {
5012 /* NETDEV_UNREGISTER could be fired for multiple times by
5013 * netdev_wait_allrefs(). Make sure we only call this once.
5014 */
12d94a80 5015 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 5016#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
5017 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5018 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
5019#endif
5020 }
5021
5022 return NOTIFY_OK;
5023}
5024
1da177e4
LT
5025/*
5026 * /proc
5027 */
5028
5029#ifdef CONFIG_PROC_FS
1da177e4
LT
5030static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5031{
69ddb805 5032 struct net *net = (struct net *)seq->private;
1da177e4 5033 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
5034 net->ipv6.rt6_stats->fib_nodes,
5035 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 5036 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
5037 net->ipv6.rt6_stats->fib_rt_entries,
5038 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 5039 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 5040 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
5041
5042 return 0;
5043}
1da177e4
LT
5044#endif /* CONFIG_PROC_FS */
5045
5046#ifdef CONFIG_SYSCTL
5047
1da177e4 5048static
fe2c6338 5049int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
5050 void __user *buffer, size_t *lenp, loff_t *ppos)
5051{
c486da34
LAG
5052 struct net *net;
5053 int delay;
5054 if (!write)
1da177e4 5055 return -EINVAL;
c486da34
LAG
5056
5057 net = (struct net *)ctl->extra1;
5058 delay = net->ipv6.sysctl.flush_delay;
5059 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 5060 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 5061 return 0;
1da177e4
LT
5062}
5063
fe2c6338 5064struct ctl_table ipv6_route_table_template[] = {
1ab1457c 5065 {
1da177e4 5066 .procname = "flush",
4990509f 5067 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 5068 .maxlen = sizeof(int),
89c8b3a1 5069 .mode = 0200,
6d9f239a 5070 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
5071 },
5072 {
1da177e4 5073 .procname = "gc_thresh",
9a7ec3a9 5074 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
5075 .maxlen = sizeof(int),
5076 .mode = 0644,
6d9f239a 5077 .proc_handler = proc_dointvec,
1da177e4
LT
5078 },
5079 {
1da177e4 5080 .procname = "max_size",
4990509f 5081 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
5082 .maxlen = sizeof(int),
5083 .mode = 0644,
6d9f239a 5084 .proc_handler = proc_dointvec,
1da177e4
LT
5085 },
5086 {
1da177e4 5087 .procname = "gc_min_interval",
4990509f 5088 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5089 .maxlen = sizeof(int),
5090 .mode = 0644,
6d9f239a 5091 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5092 },
5093 {
1da177e4 5094 .procname = "gc_timeout",
4990509f 5095 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
5096 .maxlen = sizeof(int),
5097 .mode = 0644,
6d9f239a 5098 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5099 },
5100 {
1da177e4 5101 .procname = "gc_interval",
4990509f 5102 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
5103 .maxlen = sizeof(int),
5104 .mode = 0644,
6d9f239a 5105 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5106 },
5107 {
1da177e4 5108 .procname = "gc_elasticity",
4990509f 5109 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
5110 .maxlen = sizeof(int),
5111 .mode = 0644,
f3d3f616 5112 .proc_handler = proc_dointvec,
1da177e4
LT
5113 },
5114 {
1da177e4 5115 .procname = "mtu_expires",
4990509f 5116 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
5117 .maxlen = sizeof(int),
5118 .mode = 0644,
6d9f239a 5119 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5120 },
5121 {
1da177e4 5122 .procname = "min_adv_mss",
4990509f 5123 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
5124 .maxlen = sizeof(int),
5125 .mode = 0644,
f3d3f616 5126 .proc_handler = proc_dointvec,
1da177e4
LT
5127 },
5128 {
1da177e4 5129 .procname = "gc_min_interval_ms",
4990509f 5130 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5131 .maxlen = sizeof(int),
5132 .mode = 0644,
6d9f239a 5133 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 5134 },
f8572d8f 5135 { }
1da177e4
LT
5136};
5137
2c8c1e72 5138struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5139{
5140 struct ctl_table *table;
5141
5142 table = kmemdup(ipv6_route_table_template,
5143 sizeof(ipv6_route_table_template),
5144 GFP_KERNEL);
5ee09105
YH
5145
5146 if (table) {
5147 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5148 table[0].extra1 = net;
86393e52 5149 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5150 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5151 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5152 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5153 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5154 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5155 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5156 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5157 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
5158
5159 /* Don't export sysctls to unprivileged users */
5160 if (net->user_ns != &init_user_ns)
5161 table[0].procname = NULL;
5ee09105
YH
5162 }
5163
760f2d01
DL
5164 return table;
5165}
1da177e4
LT
5166#endif
5167
2c8c1e72 5168static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5169{
633d424b 5170 int ret = -ENOMEM;
8ed67789 5171
86393e52
AD
5172 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5173 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5174
fc66f95c
ED
5175 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5176 goto out_ip6_dst_ops;
5177
421842ed
DA
5178 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5179 sizeof(*net->ipv6.fib6_null_entry),
5180 GFP_KERNEL);
5181 if (!net->ipv6.fib6_null_entry)
5182 goto out_ip6_dst_entries;
5183
8ed67789
DL
5184 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5185 sizeof(*net->ipv6.ip6_null_entry),
5186 GFP_KERNEL);
5187 if (!net->ipv6.ip6_null_entry)
421842ed 5188 goto out_fib6_null_entry;
d8d1f30b 5189 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5190 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5191 ip6_template_metrics, true);
8ed67789
DL
5192
5193#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5194 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5195 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5196 sizeof(*net->ipv6.ip6_prohibit_entry),
5197 GFP_KERNEL);
68fffc67
PZ
5198 if (!net->ipv6.ip6_prohibit_entry)
5199 goto out_ip6_null_entry;
d8d1f30b 5200 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5201 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5202 ip6_template_metrics, true);
8ed67789
DL
5203
5204 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5205 sizeof(*net->ipv6.ip6_blk_hole_entry),
5206 GFP_KERNEL);
68fffc67
PZ
5207 if (!net->ipv6.ip6_blk_hole_entry)
5208 goto out_ip6_prohibit_entry;
d8d1f30b 5209 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5210 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5211 ip6_template_metrics, true);
8ed67789
DL
5212#endif
5213
b339a47c
PZ
5214 net->ipv6.sysctl.flush_delay = 0;
5215 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5216 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5217 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5218 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5219 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5220 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5221 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5222
6891a346
BT
5223 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5224
8ed67789
DL
5225 ret = 0;
5226out:
5227 return ret;
f2fc6a54 5228
68fffc67
PZ
5229#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5230out_ip6_prohibit_entry:
5231 kfree(net->ipv6.ip6_prohibit_entry);
5232out_ip6_null_entry:
5233 kfree(net->ipv6.ip6_null_entry);
5234#endif
421842ed
DA
5235out_fib6_null_entry:
5236 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5237out_ip6_dst_entries:
5238 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5239out_ip6_dst_ops:
f2fc6a54 5240 goto out;
cdb18761
DL
5241}
5242
2c8c1e72 5243static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5244{
421842ed 5245 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5246 kfree(net->ipv6.ip6_null_entry);
5247#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5248 kfree(net->ipv6.ip6_prohibit_entry);
5249 kfree(net->ipv6.ip6_blk_hole_entry);
5250#endif
41bb78b4 5251 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5252}
5253
d189634e
TG
5254static int __net_init ip6_route_net_init_late(struct net *net)
5255{
5256#ifdef CONFIG_PROC_FS
c3506372
CH
5257 proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5258 sizeof(struct ipv6_route_iter));
3617d949
CH
5259 proc_create_net_single("rt6_stats", 0444, net->proc_net,
5260 rt6_stats_seq_show, NULL);
d189634e
TG
5261#endif
5262 return 0;
5263}
5264
5265static void __net_exit ip6_route_net_exit_late(struct net *net)
5266{
5267#ifdef CONFIG_PROC_FS
ece31ffd
G
5268 remove_proc_entry("ipv6_route", net->proc_net);
5269 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5270#endif
5271}
5272
cdb18761
DL
5273static struct pernet_operations ip6_route_net_ops = {
5274 .init = ip6_route_net_init,
5275 .exit = ip6_route_net_exit,
5276};
5277
c3426b47
DM
5278static int __net_init ipv6_inetpeer_init(struct net *net)
5279{
5280 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5281
5282 if (!bp)
5283 return -ENOMEM;
5284 inet_peer_base_init(bp);
5285 net->ipv6.peers = bp;
5286 return 0;
5287}
5288
5289static void __net_exit ipv6_inetpeer_exit(struct net *net)
5290{
5291 struct inet_peer_base *bp = net->ipv6.peers;
5292
5293 net->ipv6.peers = NULL;
56a6b248 5294 inetpeer_invalidate_tree(bp);
c3426b47
DM
5295 kfree(bp);
5296}
5297
2b823f72 5298static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5299 .init = ipv6_inetpeer_init,
5300 .exit = ipv6_inetpeer_exit,
5301};
5302
d189634e
TG
5303static struct pernet_operations ip6_route_net_late_ops = {
5304 .init = ip6_route_net_init_late,
5305 .exit = ip6_route_net_exit_late,
5306};
5307
8ed67789
DL
5308static struct notifier_block ip6_route_dev_notifier = {
5309 .notifier_call = ip6_route_dev_notify,
242d3a49 5310 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5311};
5312
2f460933
WC
5313void __init ip6_route_init_special_entries(void)
5314{
5315 /* Registering of the loopback is done before this portion of code,
5316 * the loopback reference in rt6_info will not be taken, do it
5317 * manually for init_net */
421842ed 5318 init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
2f460933
WC
5319 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5320 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5321 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5322 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5323 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5324 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5325 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5326 #endif
5327}
5328
433d49c3 5329int __init ip6_route_init(void)
1da177e4 5330{
433d49c3 5331 int ret;
8d0b94af 5332 int cpu;
433d49c3 5333
9a7ec3a9
DL
5334 ret = -ENOMEM;
5335 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5336 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5337 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5338 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5339 goto out;
14e50e57 5340
fc66f95c 5341 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5342 if (ret)
bdb3289f 5343 goto out_kmem_cache;
bdb3289f 5344
c3426b47
DM
5345 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5346 if (ret)
e8803b6c 5347 goto out_dst_entries;
2a0c451a 5348
7e52b33b
DM
5349 ret = register_pernet_subsys(&ip6_route_net_ops);
5350 if (ret)
5351 goto out_register_inetpeer;
c3426b47 5352
5dc121e9
AE
5353 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5354
e8803b6c 5355 ret = fib6_init();
433d49c3 5356 if (ret)
8ed67789 5357 goto out_register_subsys;
433d49c3 5358
433d49c3
DL
5359 ret = xfrm6_init();
5360 if (ret)
e8803b6c 5361 goto out_fib6_init;
c35b7e72 5362
433d49c3
DL
5363 ret = fib6_rules_init();
5364 if (ret)
5365 goto xfrm6_init;
7e5449c2 5366
d189634e
TG
5367 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5368 if (ret)
5369 goto fib6_rules_init;
5370
16feebcf
FW
5371 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5372 inet6_rtm_newroute, NULL, 0);
5373 if (ret < 0)
5374 goto out_register_late_subsys;
5375
5376 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5377 inet6_rtm_delroute, NULL, 0);
5378 if (ret < 0)
5379 goto out_register_late_subsys;
5380
5381 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5382 inet6_rtm_getroute, NULL,
5383 RTNL_FLAG_DOIT_UNLOCKED);
5384 if (ret < 0)
d189634e 5385 goto out_register_late_subsys;
c127ea2c 5386
8ed67789 5387 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5388 if (ret)
d189634e 5389 goto out_register_late_subsys;
8ed67789 5390
8d0b94af
MKL
5391 for_each_possible_cpu(cpu) {
5392 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5393
5394 INIT_LIST_HEAD(&ul->head);
5395 spin_lock_init(&ul->lock);
5396 }
5397
433d49c3
DL
5398out:
5399 return ret;
5400
d189634e 5401out_register_late_subsys:
16feebcf 5402 rtnl_unregister_all(PF_INET6);
d189634e 5403 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5404fib6_rules_init:
433d49c3
DL
5405 fib6_rules_cleanup();
5406xfrm6_init:
433d49c3 5407 xfrm6_fini();
2a0c451a
TG
5408out_fib6_init:
5409 fib6_gc_cleanup();
8ed67789
DL
5410out_register_subsys:
5411 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5412out_register_inetpeer:
5413 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5414out_dst_entries:
5415 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5416out_kmem_cache:
f2fc6a54 5417 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5418 goto out;
1da177e4
LT
5419}
5420
5421void ip6_route_cleanup(void)
5422{
8ed67789 5423 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5424 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5425 fib6_rules_cleanup();
1da177e4 5426 xfrm6_fini();
1da177e4 5427 fib6_gc_cleanup();
c3426b47 5428 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5429 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5430 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5431 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5432}