]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - net/ipv6/route.c
bpf, doc: clarification for the meaning of 'id'
[thirdparty/kernel/stable.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
b811580d 66#include <trace/events/fib6.h>
1da177e4 67
7c0f6ba6 68#include <linux/uaccess.h>
1da177e4
LT
69
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
afc154e9 74enum rt6_nud_state {
7e980569
JB
75 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
78 RT6_NUD_SUCCEED = 1
79};
80
1da177e4 81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 82static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 83static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
569d3645 88static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
89
90static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 91static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 92static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 93static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 94static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
95static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
8d1c802b
DA
99static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
100static size_t rt6_nlmsg_size(struct fib6_info *rt);
d4ead6b3 101static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 102 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 103 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
104 int iif, int type, u32 portid, u32 seq,
105 unsigned int flags);
8d1c802b 106static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
107 struct in6_addr *daddr,
108 struct in6_addr *saddr);
1da177e4 109
70ceb4f5 110#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 111static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 112 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
113 const struct in6_addr *gwaddr,
114 struct net_device *dev,
95c96174 115 unsigned int pref);
8d1c802b 116static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 117 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
118 const struct in6_addr *gwaddr,
119 struct net_device *dev);
70ceb4f5
YH
120#endif
121
8d0b94af
MKL
122struct uncached_list {
123 spinlock_t lock;
124 struct list_head head;
125};
126
127static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
128
510c321b 129void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
130{
131 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
132
8d0b94af
MKL
133 rt->rt6i_uncached_list = ul;
134
135 spin_lock_bh(&ul->lock);
136 list_add_tail(&rt->rt6i_uncached, &ul->head);
137 spin_unlock_bh(&ul->lock);
138}
139
510c321b 140void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
141{
142 if (!list_empty(&rt->rt6i_uncached)) {
143 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 144 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
145
146 spin_lock_bh(&ul->lock);
147 list_del(&rt->rt6i_uncached);
81eb8447 148 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
149 spin_unlock_bh(&ul->lock);
150 }
151}
152
153static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
154{
155 struct net_device *loopback_dev = net->loopback_dev;
156 int cpu;
157
e332bc67
EB
158 if (dev == loopback_dev)
159 return;
160
8d0b94af
MKL
161 for_each_possible_cpu(cpu) {
162 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
163 struct rt6_info *rt;
164
165 spin_lock_bh(&ul->lock);
166 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
167 struct inet6_dev *rt_idev = rt->rt6i_idev;
168 struct net_device *rt_dev = rt->dst.dev;
169
e332bc67 170 if (rt_idev->dev == dev) {
8d0b94af
MKL
171 rt->rt6i_idev = in6_dev_get(loopback_dev);
172 in6_dev_put(rt_idev);
173 }
174
e332bc67 175 if (rt_dev == dev) {
8d0b94af
MKL
176 rt->dst.dev = loopback_dev;
177 dev_hold(rt->dst.dev);
178 dev_put(rt_dev);
179 }
180 }
181 spin_unlock_bh(&ul->lock);
182 }
183}
184
f8a1b43b 185static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
186 struct sk_buff *skb,
187 const void *daddr)
39232973 188{
a7563f34 189 if (!ipv6_addr_any(p))
39232973 190 return (const void *) p;
f894cbf8
DM
191 else if (skb)
192 return &ipv6_hdr(skb)->daddr;
39232973
DM
193 return daddr;
194}
195
f8a1b43b
DA
196struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
197 struct net_device *dev,
198 struct sk_buff *skb,
199 const void *daddr)
d3aaeb38 200{
39232973
DM
201 struct neighbour *n;
202
f8a1b43b
DA
203 daddr = choose_neigh_daddr(gw, skb, daddr);
204 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
205 if (n)
206 return n;
f8a1b43b
DA
207 return neigh_create(&nd_tbl, daddr, dev);
208}
209
210static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
211 struct sk_buff *skb,
212 const void *daddr)
213{
214 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
215
216 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
f83c7790
DM
217}
218
63fca65d
JA
219static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
220{
221 struct net_device *dev = dst->dev;
222 struct rt6_info *rt = (struct rt6_info *)dst;
223
f8a1b43b 224 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
63fca65d
JA
225 if (!daddr)
226 return;
227 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
228 return;
229 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
230 return;
231 __ipv6_confirm_neigh(dev, daddr);
232}
233
9a7ec3a9 234static struct dst_ops ip6_dst_ops_template = {
1da177e4 235 .family = AF_INET6,
1da177e4
LT
236 .gc = ip6_dst_gc,
237 .gc_thresh = 1024,
238 .check = ip6_dst_check,
0dbaee3b 239 .default_advmss = ip6_default_advmss,
ebb762f2 240 .mtu = ip6_mtu,
d4ead6b3 241 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
242 .destroy = ip6_dst_destroy,
243 .ifdown = ip6_dst_ifdown,
244 .negative_advice = ip6_negative_advice,
245 .link_failure = ip6_link_failure,
246 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 247 .redirect = rt6_do_redirect,
9f8955cc 248 .local_out = __ip6_local_out,
f8a1b43b 249 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 250 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
251};
252
ebb762f2 253static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 254{
618f9bc7
SK
255 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
256
257 return mtu ? : dst->dev->mtu;
ec831ea7
RD
258}
259
6700c270
DM
260static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
261 struct sk_buff *skb, u32 mtu)
14e50e57
DM
262{
263}
264
6700c270
DM
265static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
266 struct sk_buff *skb)
b587ee3b
DM
267{
268}
269
14e50e57
DM
270static struct dst_ops ip6_dst_blackhole_ops = {
271 .family = AF_INET6,
14e50e57
DM
272 .destroy = ip6_dst_destroy,
273 .check = ip6_dst_check,
ebb762f2 274 .mtu = ip6_blackhole_mtu,
214f45c9 275 .default_advmss = ip6_default_advmss,
14e50e57 276 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 277 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 278 .cow_metrics = dst_cow_metrics_generic,
f8a1b43b 279 .neigh_lookup = ip6_dst_neigh_lookup,
14e50e57
DM
280};
281
62fa8a84 282static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 283 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
284};
285
8d1c802b 286static const struct fib6_info fib6_null_entry_template = {
93c2fb25
DA
287 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
288 .fib6_protocol = RTPROT_KERNEL,
289 .fib6_metric = ~(u32)0,
290 .fib6_ref = ATOMIC_INIT(1),
421842ed
DA
291 .fib6_type = RTN_UNREACHABLE,
292 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
293};
294
fb0af4c7 295static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
296 .dst = {
297 .__refcnt = ATOMIC_INIT(1),
298 .__use = 1,
2c20cbd7 299 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 300 .error = -ENETUNREACH,
d8d1f30b
CG
301 .input = ip6_pkt_discard,
302 .output = ip6_pkt_discard_out,
1da177e4
LT
303 },
304 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
1da177e4
LT
305};
306
101367c2
TG
307#ifdef CONFIG_IPV6_MULTIPLE_TABLES
308
fb0af4c7 309static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
310 .dst = {
311 .__refcnt = ATOMIC_INIT(1),
312 .__use = 1,
2c20cbd7 313 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 314 .error = -EACCES,
d8d1f30b
CG
315 .input = ip6_pkt_prohibit,
316 .output = ip6_pkt_prohibit_out,
101367c2
TG
317 },
318 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
319};
320
fb0af4c7 321static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
322 .dst = {
323 .__refcnt = ATOMIC_INIT(1),
324 .__use = 1,
2c20cbd7 325 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 326 .error = -EINVAL,
d8d1f30b 327 .input = dst_discard,
ede2059d 328 .output = dst_discard_out,
101367c2
TG
329 },
330 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
331};
332
333#endif
334
ebfa45f0
MKL
335static void rt6_info_init(struct rt6_info *rt)
336{
337 struct dst_entry *dst = &rt->dst;
338
339 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
ebfa45f0
MKL
340 INIT_LIST_HEAD(&rt->rt6i_uncached);
341}
342
1da177e4 343/* allocate dst with ip6_dst_ops */
93531c67
DA
344struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
345 int flags)
1da177e4 346{
97bab73f 347 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 348 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 349
81eb8447 350 if (rt) {
ebfa45f0 351 rt6_info_init(rt);
81eb8447
WW
352 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
353 }
8104891b 354
cf911662 355 return rt;
1da177e4 356}
9ab179d8 357EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 358
1da177e4
LT
359static void ip6_dst_destroy(struct dst_entry *dst)
360{
361 struct rt6_info *rt = (struct rt6_info *)dst;
a68886a6 362 struct fib6_info *from;
8d0b94af 363 struct inet6_dev *idev;
1da177e4 364
4b32b5ad 365 dst_destroy_metrics_generic(dst);
8d0b94af
MKL
366 rt6_uncached_list_del(rt);
367
368 idev = rt->rt6i_idev;
38308473 369 if (idev) {
1da177e4
LT
370 rt->rt6i_idev = NULL;
371 in6_dev_put(idev);
1ab1457c 372 }
d4ead6b3 373
a68886a6
DA
374 rcu_read_lock();
375 from = rcu_dereference(rt->from);
376 rcu_assign_pointer(rt->from, NULL);
93531c67 377 fib6_info_release(from);
a68886a6 378 rcu_read_unlock();
b3419363
DM
379}
380
1da177e4
LT
381static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
382 int how)
383{
384 struct rt6_info *rt = (struct rt6_info *)dst;
385 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 386 struct net_device *loopback_dev =
c346dca1 387 dev_net(dev)->loopback_dev;
1da177e4 388
e5645f51
WW
389 if (idev && idev->dev != loopback_dev) {
390 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
391 if (loopback_idev) {
392 rt->rt6i_idev = loopback_idev;
393 in6_dev_put(idev);
97cac082 394 }
1da177e4
LT
395 }
396}
397
5973fb1e
MKL
398static bool __rt6_check_expired(const struct rt6_info *rt)
399{
400 if (rt->rt6i_flags & RTF_EXPIRES)
401 return time_after(jiffies, rt->dst.expires);
402 else
403 return false;
404}
405
a50feda5 406static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 407{
a68886a6
DA
408 struct fib6_info *from;
409
410 from = rcu_dereference(rt->from);
411
1716a961
G
412 if (rt->rt6i_flags & RTF_EXPIRES) {
413 if (time_after(jiffies, rt->dst.expires))
a50feda5 414 return true;
a68886a6 415 } else if (from) {
1e2ea8ad 416 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
a68886a6 417 fib6_check_expired(from);
1716a961 418 }
a50feda5 419 return false;
1da177e4
LT
420}
421
8d1c802b
DA
422static struct fib6_info *rt6_multipath_select(const struct net *net,
423 struct fib6_info *match,
52bd4c0c 424 struct flowi6 *fl6, int oif,
b75cc8f9 425 const struct sk_buff *skb,
52bd4c0c 426 int strict)
51ebd318 427{
8d1c802b 428 struct fib6_info *sibling, *next_sibling;
51ebd318 429
b673d6cc
JS
430 /* We might have already computed the hash for ICMPv6 errors. In such
431 * case it will always be non-zero. Otherwise now is the time to do it.
432 */
433 if (!fl6->mp_hash)
b4bac172 434 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 435
5e670d84 436 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
3d709f69
IS
437 return match;
438
93c2fb25
DA
439 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
440 fib6_siblings) {
5e670d84
DA
441 int nh_upper_bound;
442
443 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
444 if (fl6->mp_hash > nh_upper_bound)
3d709f69
IS
445 continue;
446 if (rt6_score_route(sibling, oif, strict) < 0)
447 break;
448 match = sibling;
449 break;
450 }
451
51ebd318
ND
452 return match;
453}
454
1da177e4 455/*
66f5d6ce 456 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
457 */
458
8d1c802b
DA
459static inline struct fib6_info *rt6_device_match(struct net *net,
460 struct fib6_info *rt,
b71d1d42 461 const struct in6_addr *saddr,
1da177e4 462 int oif,
d420895e 463 int flags)
1da177e4 464{
8d1c802b 465 struct fib6_info *sprt;
1da177e4 466
5e670d84
DA
467 if (!oif && ipv6_addr_any(saddr) &&
468 !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
8067bb8c 469 return rt;
dd3abc4e 470
8fb11a9a 471 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
5e670d84 472 const struct net_device *dev = sprt->fib6_nh.nh_dev;
dd3abc4e 473
5e670d84 474 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
475 continue;
476
dd3abc4e 477 if (oif) {
1da177e4
LT
478 if (dev->ifindex == oif)
479 return sprt;
dd3abc4e
YH
480 } else {
481 if (ipv6_chk_addr(net, saddr, dev,
482 flags & RT6_LOOKUP_F_IFACE))
483 return sprt;
1da177e4 484 }
dd3abc4e 485 }
1da177e4 486
eea68cd3
DA
487 if (oif && flags & RT6_LOOKUP_F_IFACE)
488 return net->ipv6.fib6_null_entry;
8067bb8c 489
421842ed 490 return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
1da177e4
LT
491}
492
27097255 493#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
494struct __rt6_probe_work {
495 struct work_struct work;
496 struct in6_addr target;
497 struct net_device *dev;
498};
499
500static void rt6_probe_deferred(struct work_struct *w)
501{
502 struct in6_addr mcaddr;
503 struct __rt6_probe_work *work =
504 container_of(w, struct __rt6_probe_work, work);
505
506 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 507 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 508 dev_put(work->dev);
662f5533 509 kfree(work);
c2f17e82
HFS
510}
511
8d1c802b 512static void rt6_probe(struct fib6_info *rt)
27097255 513{
990edb42 514 struct __rt6_probe_work *work;
5e670d84 515 const struct in6_addr *nh_gw;
f2c31e32 516 struct neighbour *neigh;
5e670d84
DA
517 struct net_device *dev;
518
27097255
YH
519 /*
520 * Okay, this does not seem to be appropriate
521 * for now, however, we need to check if it
522 * is really so; aka Router Reachability Probing.
523 *
524 * Router Reachability Probe MUST be rate-limited
525 * to no more than one per minute.
526 */
93c2fb25 527 if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
7ff74a59 528 return;
5e670d84
DA
529
530 nh_gw = &rt->fib6_nh.nh_gw;
531 dev = rt->fib6_nh.nh_dev;
2152caea 532 rcu_read_lock_bh();
5e670d84 533 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 534 if (neigh) {
dcd1f572
DA
535 struct inet6_dev *idev;
536
8d6c31bf
MKL
537 if (neigh->nud_state & NUD_VALID)
538 goto out;
539
dcd1f572 540 idev = __in6_dev_get(dev);
990edb42 541 work = NULL;
2152caea 542 write_lock(&neigh->lock);
990edb42
MKL
543 if (!(neigh->nud_state & NUD_VALID) &&
544 time_after(jiffies,
dcd1f572 545 neigh->updated + idev->cnf.rtr_probe_interval)) {
990edb42
MKL
546 work = kmalloc(sizeof(*work), GFP_ATOMIC);
547 if (work)
548 __neigh_set_probe_once(neigh);
c2f17e82 549 }
2152caea 550 write_unlock(&neigh->lock);
990edb42
MKL
551 } else {
552 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 553 }
990edb42
MKL
554
555 if (work) {
556 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
557 work->target = *nh_gw;
558 dev_hold(dev);
559 work->dev = dev;
990edb42
MKL
560 schedule_work(&work->work);
561 }
562
8d6c31bf 563out:
2152caea 564 rcu_read_unlock_bh();
27097255
YH
565}
566#else
8d1c802b 567static inline void rt6_probe(struct fib6_info *rt)
27097255 568{
27097255
YH
569}
570#endif
571
1da177e4 572/*
554cfb7e 573 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 574 */
8d1c802b 575static inline int rt6_check_dev(struct fib6_info *rt, int oif)
554cfb7e 576{
5e670d84
DA
577 const struct net_device *dev = rt->fib6_nh.nh_dev;
578
161980f4 579 if (!oif || dev->ifindex == oif)
554cfb7e 580 return 2;
161980f4 581 return 0;
554cfb7e 582}
1da177e4 583
8d1c802b 584static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
1da177e4 585{
afc154e9 586 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 587 struct neighbour *neigh;
f2c31e32 588
93c2fb25
DA
589 if (rt->fib6_flags & RTF_NONEXTHOP ||
590 !(rt->fib6_flags & RTF_GATEWAY))
afc154e9 591 return RT6_NUD_SUCCEED;
145a3621
YH
592
593 rcu_read_lock_bh();
5e670d84
DA
594 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
595 &rt->fib6_nh.nh_gw);
145a3621
YH
596 if (neigh) {
597 read_lock(&neigh->lock);
554cfb7e 598 if (neigh->nud_state & NUD_VALID)
afc154e9 599 ret = RT6_NUD_SUCCEED;
398bcbeb 600#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 601 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 602 ret = RT6_NUD_SUCCEED;
7e980569
JB
603 else
604 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 605#endif
145a3621 606 read_unlock(&neigh->lock);
afc154e9
HFS
607 } else {
608 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 609 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 610 }
145a3621
YH
611 rcu_read_unlock_bh();
612
a5a81f0b 613 return ret;
1da177e4
LT
614}
615
8d1c802b 616static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
1da177e4 617{
a5a81f0b 618 int m;
1ab1457c 619
4d0c5911 620 m = rt6_check_dev(rt, oif);
77d16f45 621 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 622 return RT6_NUD_FAIL_HARD;
ebacaaa0 623#ifdef CONFIG_IPV6_ROUTER_PREF
93c2fb25 624 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
ebacaaa0 625#endif
afc154e9
HFS
626 if (strict & RT6_LOOKUP_F_REACHABLE) {
627 int n = rt6_check_neigh(rt);
628 if (n < 0)
629 return n;
630 }
554cfb7e
YH
631 return m;
632}
633
dcd1f572
DA
634/* called with rc_read_lock held */
635static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
636{
637 const struct net_device *dev = fib6_info_nh_dev(f6i);
638 bool rc = false;
639
640 if (dev) {
641 const struct inet6_dev *idev = __in6_dev_get(dev);
642
643 rc = !!idev->cnf.ignore_routes_with_linkdown;
644 }
645
646 return rc;
647}
648
8d1c802b
DA
649static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
650 int *mpri, struct fib6_info *match,
afc154e9 651 bool *do_rr)
554cfb7e 652{
f11e6659 653 int m;
afc154e9 654 bool match_do_rr = false;
35103d11 655
5e670d84 656 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
657 goto out;
658
dcd1f572 659 if (fib6_ignore_linkdown(rt) &&
5e670d84 660 rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 661 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 662 goto out;
f11e6659 663
14895687 664 if (fib6_check_expired(rt))
f11e6659
DM
665 goto out;
666
667 m = rt6_score_route(rt, oif, strict);
7e980569 668 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
669 match_do_rr = true;
670 m = 0; /* lowest valid score */
7e980569 671 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 672 goto out;
afc154e9
HFS
673 }
674
675 if (strict & RT6_LOOKUP_F_REACHABLE)
676 rt6_probe(rt);
f11e6659 677
7e980569 678 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 679 if (m > *mpri) {
afc154e9 680 *do_rr = match_do_rr;
f11e6659
DM
681 *mpri = m;
682 match = rt;
f11e6659 683 }
f11e6659
DM
684out:
685 return match;
686}
687
8d1c802b
DA
688static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
689 struct fib6_info *leaf,
690 struct fib6_info *rr_head,
afc154e9
HFS
691 u32 metric, int oif, int strict,
692 bool *do_rr)
f11e6659 693{
8d1c802b 694 struct fib6_info *rt, *match, *cont;
554cfb7e 695 int mpri = -1;
1da177e4 696
f11e6659 697 match = NULL;
9fbdcfaf 698 cont = NULL;
8fb11a9a 699 for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 700 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
701 cont = rt;
702 break;
703 }
704
705 match = find_match(rt, oif, strict, &mpri, match, do_rr);
706 }
707
66f5d6ce 708 for (rt = leaf; rt && rt != rr_head;
8fb11a9a 709 rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 710 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
711 cont = rt;
712 break;
713 }
714
afc154e9 715 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
716 }
717
718 if (match || !cont)
719 return match;
720
8fb11a9a 721 for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
afc154e9 722 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 723
f11e6659
DM
724 return match;
725}
1da177e4 726
8d1c802b 727static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
8d1040e8 728 int oif, int strict)
f11e6659 729{
8d1c802b
DA
730 struct fib6_info *leaf = rcu_dereference(fn->leaf);
731 struct fib6_info *match, *rt0;
afc154e9 732 bool do_rr = false;
17ecf590 733 int key_plen;
1da177e4 734
421842ed
DA
735 if (!leaf || leaf == net->ipv6.fib6_null_entry)
736 return net->ipv6.fib6_null_entry;
8d1040e8 737
66f5d6ce 738 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 739 if (!rt0)
66f5d6ce 740 rt0 = leaf;
1da177e4 741
17ecf590
WW
742 /* Double check to make sure fn is not an intermediate node
743 * and fn->leaf does not points to its child's leaf
744 * (This might happen if all routes under fn are deleted from
745 * the tree and fib6_repair_tree() is called on the node.)
746 */
93c2fb25 747 key_plen = rt0->fib6_dst.plen;
17ecf590 748#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
749 if (rt0->fib6_src.plen)
750 key_plen = rt0->fib6_src.plen;
17ecf590
WW
751#endif
752 if (fn->fn_bit != key_plen)
421842ed 753 return net->ipv6.fib6_null_entry;
17ecf590 754
93c2fb25 755 match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
afc154e9 756 &do_rr);
1da177e4 757
afc154e9 758 if (do_rr) {
8fb11a9a 759 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
f11e6659 760
554cfb7e 761 /* no entries matched; do round-robin */
93c2fb25 762 if (!next || next->fib6_metric != rt0->fib6_metric)
8d1040e8 763 next = leaf;
f11e6659 764
66f5d6ce 765 if (next != rt0) {
93c2fb25 766 spin_lock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 767 /* make sure next is not being deleted from the tree */
93c2fb25 768 if (next->fib6_node)
66f5d6ce 769 rcu_assign_pointer(fn->rr_ptr, next);
93c2fb25 770 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 771 }
1da177e4 772 }
1da177e4 773
421842ed 774 return match ? match : net->ipv6.fib6_null_entry;
1da177e4
LT
775}
776
8d1c802b 777static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
8b9df265 778{
93c2fb25 779 return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
8b9df265
MKL
780}
781
70ceb4f5
YH
782#ifdef CONFIG_IPV6_ROUTE_INFO
783int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 784 const struct in6_addr *gwaddr)
70ceb4f5 785{
c346dca1 786 struct net *net = dev_net(dev);
70ceb4f5
YH
787 struct route_info *rinfo = (struct route_info *) opt;
788 struct in6_addr prefix_buf, *prefix;
789 unsigned int pref;
4bed72e4 790 unsigned long lifetime;
8d1c802b 791 struct fib6_info *rt;
70ceb4f5
YH
792
793 if (len < sizeof(struct route_info)) {
794 return -EINVAL;
795 }
796
797 /* Sanity check for prefix_len and length */
798 if (rinfo->length > 3) {
799 return -EINVAL;
800 } else if (rinfo->prefix_len > 128) {
801 return -EINVAL;
802 } else if (rinfo->prefix_len > 64) {
803 if (rinfo->length < 2) {
804 return -EINVAL;
805 }
806 } else if (rinfo->prefix_len > 0) {
807 if (rinfo->length < 1) {
808 return -EINVAL;
809 }
810 }
811
812 pref = rinfo->route_pref;
813 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 814 return -EINVAL;
70ceb4f5 815
4bed72e4 816 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
817
818 if (rinfo->length == 3)
819 prefix = (struct in6_addr *)rinfo->prefix;
820 else {
821 /* this function is safe */
822 ipv6_addr_prefix(&prefix_buf,
823 (struct in6_addr *)rinfo->prefix,
824 rinfo->prefix_len);
825 prefix = &prefix_buf;
826 }
827
f104a567 828 if (rinfo->prefix_len == 0)
afb1d4b5 829 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
830 else
831 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 832 gwaddr, dev);
70ceb4f5
YH
833
834 if (rt && !lifetime) {
afb1d4b5 835 ip6_del_rt(net, rt);
70ceb4f5
YH
836 rt = NULL;
837 }
838
839 if (!rt && lifetime)
830218c1
DA
840 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
841 dev, pref);
70ceb4f5 842 else if (rt)
93c2fb25
DA
843 rt->fib6_flags = RTF_ROUTEINFO |
844 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
70ceb4f5
YH
845
846 if (rt) {
1716a961 847 if (!addrconf_finite_timeout(lifetime))
14895687 848 fib6_clean_expires(rt);
1716a961 849 else
14895687 850 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 851
93531c67 852 fib6_info_release(rt);
70ceb4f5
YH
853 }
854 return 0;
855}
856#endif
857
ae90d867
DA
858/*
859 * Misc support functions
860 */
861
862/* called with rcu_lock held */
8d1c802b 863static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
ae90d867 864{
5e670d84 865 struct net_device *dev = rt->fib6_nh.nh_dev;
ae90d867 866
93c2fb25 867 if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
ae90d867
DA
868 /* for copies of local routes, dst->dev needs to be the
869 * device if it is a master device, the master device if
870 * device is enslaved, and the loopback as the default
871 */
872 if (netif_is_l3_slave(dev) &&
93c2fb25 873 !rt6_need_strict(&rt->fib6_dst.addr))
ae90d867
DA
874 dev = l3mdev_master_dev_rcu(dev);
875 else if (!netif_is_l3_master(dev))
876 dev = dev_net(dev)->loopback_dev;
877 /* last case is netif_is_l3_master(dev) is true in which
878 * case we want dev returned to be dev
879 */
880 }
881
882 return dev;
883}
884
6edb3c96
DA
885static const int fib6_prop[RTN_MAX + 1] = {
886 [RTN_UNSPEC] = 0,
887 [RTN_UNICAST] = 0,
888 [RTN_LOCAL] = 0,
889 [RTN_BROADCAST] = 0,
890 [RTN_ANYCAST] = 0,
891 [RTN_MULTICAST] = 0,
892 [RTN_BLACKHOLE] = -EINVAL,
893 [RTN_UNREACHABLE] = -EHOSTUNREACH,
894 [RTN_PROHIBIT] = -EACCES,
895 [RTN_THROW] = -EAGAIN,
896 [RTN_NAT] = -EINVAL,
897 [RTN_XRESOLVE] = -EINVAL,
898};
899
900static int ip6_rt_type_to_error(u8 fib6_type)
901{
902 return fib6_prop[fib6_type];
903}
904
8d1c802b 905static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
3b6761d1
DA
906{
907 unsigned short flags = 0;
908
909 if (rt->dst_nocount)
910 flags |= DST_NOCOUNT;
911 if (rt->dst_nopolicy)
912 flags |= DST_NOPOLICY;
913 if (rt->dst_host)
914 flags |= DST_HOST;
915
916 return flags;
917}
918
8d1c802b 919static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96
DA
920{
921 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
922
923 switch (ort->fib6_type) {
924 case RTN_BLACKHOLE:
925 rt->dst.output = dst_discard_out;
926 rt->dst.input = dst_discard;
927 break;
928 case RTN_PROHIBIT:
929 rt->dst.output = ip6_pkt_prohibit_out;
930 rt->dst.input = ip6_pkt_prohibit;
931 break;
932 case RTN_THROW:
933 case RTN_UNREACHABLE:
934 default:
935 rt->dst.output = ip6_pkt_discard_out;
936 rt->dst.input = ip6_pkt_discard;
937 break;
938 }
939}
940
8d1c802b 941static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96 942{
3b6761d1
DA
943 rt->dst.flags |= fib6_info_dst_flags(ort);
944
93c2fb25 945 if (ort->fib6_flags & RTF_REJECT) {
6edb3c96
DA
946 ip6_rt_init_dst_reject(rt, ort);
947 return;
948 }
949
950 rt->dst.error = 0;
951 rt->dst.output = ip6_output;
952
953 if (ort->fib6_type == RTN_LOCAL) {
6edb3c96 954 rt->dst.input = ip6_input;
93c2fb25 955 } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
6edb3c96
DA
956 rt->dst.input = ip6_mc_input;
957 } else {
958 rt->dst.input = ip6_forward;
959 }
960
961 if (ort->fib6_nh.nh_lwtstate) {
962 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
963 lwtunnel_set_redirect(&rt->dst);
964 }
965
966 rt->dst.lastuse = jiffies;
967}
968
8d1c802b 969static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
ae90d867 970{
ae90d867 971 rt->rt6i_flags &= ~RTF_EXPIRES;
93531c67 972 fib6_info_hold(from);
a68886a6 973 rcu_assign_pointer(rt->from, from);
d4ead6b3
DA
974 dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
975 if (from->fib6_metrics != &dst_default_metrics) {
976 rt->dst._metrics |= DST_METRICS_REFCOUNTED;
977 refcount_inc(&from->fib6_metrics->refcnt);
978 }
ae90d867
DA
979}
980
8d1c802b 981static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
ae90d867 982{
dcd1f572
DA
983 struct net_device *dev = fib6_info_nh_dev(ort);
984
6edb3c96
DA
985 ip6_rt_init_dst(rt, ort);
986
93c2fb25 987 rt->rt6i_dst = ort->fib6_dst;
dcd1f572 988 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
5e670d84 989 rt->rt6i_gateway = ort->fib6_nh.nh_gw;
93c2fb25 990 rt->rt6i_flags = ort->fib6_flags;
ae90d867 991 rt6_set_from(rt, ort);
ae90d867 992#ifdef CONFIG_IPV6_SUBTREES
93c2fb25 993 rt->rt6i_src = ort->fib6_src;
ae90d867 994#endif
93c2fb25 995 rt->rt6i_prefsrc = ort->fib6_prefsrc;
5e670d84 996 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
ae90d867
DA
997}
998
a3c00e46
MKL
999static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1000 struct in6_addr *saddr)
1001{
66f5d6ce 1002 struct fib6_node *pn, *sn;
a3c00e46
MKL
1003 while (1) {
1004 if (fn->fn_flags & RTN_TL_ROOT)
1005 return NULL;
66f5d6ce
WW
1006 pn = rcu_dereference(fn->parent);
1007 sn = FIB6_SUBTREE(pn);
1008 if (sn && sn != fn)
1009 fn = fib6_lookup(sn, NULL, saddr);
a3c00e46
MKL
1010 else
1011 fn = pn;
1012 if (fn->fn_flags & RTN_RTINFO)
1013 return fn;
1014 }
1015}
c71099ac 1016
d3843fe5
WW
1017static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1018 bool null_fallback)
1019{
1020 struct rt6_info *rt = *prt;
1021
1022 if (dst_hold_safe(&rt->dst))
1023 return true;
1024 if (null_fallback) {
1025 rt = net->ipv6.ip6_null_entry;
1026 dst_hold(&rt->dst);
1027 } else {
1028 rt = NULL;
1029 }
1030 *prt = rt;
1031 return false;
1032}
1033
dec9b0e2 1034/* called with rcu_lock held */
8d1c802b 1035static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
dec9b0e2 1036{
3b6761d1 1037 unsigned short flags = fib6_info_dst_flags(rt);
dec9b0e2
DA
1038 struct net_device *dev = rt->fib6_nh.nh_dev;
1039 struct rt6_info *nrt;
1040
93531c67 1041 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
dec9b0e2
DA
1042 if (nrt)
1043 ip6_rt_copy_init(nrt, rt);
1044
1045 return nrt;
1046}
1047
8ed67789
DL
1048static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1049 struct fib6_table *table,
b75cc8f9
DA
1050 struct flowi6 *fl6,
1051 const struct sk_buff *skb,
1052 int flags)
1da177e4 1053{
8d1c802b 1054 struct fib6_info *f6i;
1da177e4 1055 struct fib6_node *fn;
23fb93a4 1056 struct rt6_info *rt;
1da177e4 1057
b6cdbc85
DA
1058 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1059 flags &= ~RT6_LOOKUP_F_IFACE;
1060
66f5d6ce 1061 rcu_read_lock();
4c9483b2 1062 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1063restart:
23fb93a4
DA
1064 f6i = rcu_dereference(fn->leaf);
1065 if (!f6i) {
1066 f6i = net->ipv6.fib6_null_entry;
66f5d6ce 1067 } else {
23fb93a4 1068 f6i = rt6_device_match(net, f6i, &fl6->saddr,
66f5d6ce 1069 fl6->flowi6_oif, flags);
93c2fb25 1070 if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
23fb93a4
DA
1071 f6i = rt6_multipath_select(net, f6i, fl6,
1072 fl6->flowi6_oif, skb, flags);
66f5d6ce 1073 }
23fb93a4 1074 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1075 fn = fib6_backtrack(fn, &fl6->saddr);
1076 if (fn)
1077 goto restart;
1078 }
23fb93a4 1079
2b760fcf 1080 /* Search through exception table */
23fb93a4
DA
1081 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1082 if (rt) {
dec9b0e2
DA
1083 if (ip6_hold_safe(net, &rt, true))
1084 dst_use_noref(&rt->dst, jiffies);
23fb93a4 1085 } else if (f6i == net->ipv6.fib6_null_entry) {
dec9b0e2
DA
1086 rt = net->ipv6.ip6_null_entry;
1087 dst_hold(&rt->dst);
23fb93a4
DA
1088 } else {
1089 rt = ip6_create_rt_rcu(f6i);
1090 if (!rt) {
1091 rt = net->ipv6.ip6_null_entry;
1092 dst_hold(&rt->dst);
1093 }
dec9b0e2 1094 }
d3843fe5 1095
66f5d6ce 1096 rcu_read_unlock();
b811580d 1097
b65f164d 1098 trace_fib6_table_lookup(net, rt, table, fl6);
b811580d 1099
c71099ac 1100 return rt;
c71099ac
TG
1101}
1102
67ba4152 1103struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1104 const struct sk_buff *skb, int flags)
ea6e574e 1105{
b75cc8f9 1106 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1107}
1108EXPORT_SYMBOL_GPL(ip6_route_lookup);
1109
9acd9f3a 1110struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1111 const struct in6_addr *saddr, int oif,
1112 const struct sk_buff *skb, int strict)
c71099ac 1113{
4c9483b2
DM
1114 struct flowi6 fl6 = {
1115 .flowi6_oif = oif,
1116 .daddr = *daddr,
c71099ac
TG
1117 };
1118 struct dst_entry *dst;
77d16f45 1119 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1120
adaa70bb 1121 if (saddr) {
4c9483b2 1122 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1123 flags |= RT6_LOOKUP_F_HAS_SADDR;
1124 }
1125
b75cc8f9 1126 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1127 if (dst->error == 0)
1128 return (struct rt6_info *) dst;
1129
1130 dst_release(dst);
1131
1da177e4
LT
1132 return NULL;
1133}
7159039a
YH
1134EXPORT_SYMBOL(rt6_lookup);
1135
c71099ac 1136/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1137 * It takes new route entry, the addition fails by any reason the
1138 * route is released.
1139 * Caller must hold dst before calling it.
1da177e4
LT
1140 */
1141
8d1c802b 1142static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
333c4301 1143 struct netlink_ext_ack *extack)
1da177e4
LT
1144{
1145 int err;
c71099ac 1146 struct fib6_table *table;
1da177e4 1147
93c2fb25 1148 table = rt->fib6_table;
66f5d6ce 1149 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1150 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1151 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1152
1153 return err;
1154}
1155
8d1c802b 1156int ip6_ins_rt(struct net *net, struct fib6_info *rt)
40e22e8f 1157{
afb1d4b5 1158 struct nl_info info = { .nl_net = net, };
e715b6d3 1159
d4ead6b3 1160 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1161}
1162
8d1c802b 1163static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
8b9df265
MKL
1164 const struct in6_addr *daddr,
1165 const struct in6_addr *saddr)
1da177e4 1166{
4832c30d 1167 struct net_device *dev;
1da177e4
LT
1168 struct rt6_info *rt;
1169
1170 /*
1171 * Clone the route.
1172 */
1173
4832c30d 1174 dev = ip6_rt_get_dev_rcu(ort);
93531c67 1175 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
83a09abd
MKL
1176 if (!rt)
1177 return NULL;
1178
1179 ip6_rt_copy_init(rt, ort);
1180 rt->rt6i_flags |= RTF_CACHE;
83a09abd
MKL
1181 rt->dst.flags |= DST_HOST;
1182 rt->rt6i_dst.addr = *daddr;
1183 rt->rt6i_dst.plen = 128;
1da177e4 1184
83a09abd 1185 if (!rt6_is_gw_or_nonexthop(ort)) {
93c2fb25
DA
1186 if (ort->fib6_dst.plen != 128 &&
1187 ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
83a09abd 1188 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1189#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1190 if (rt->rt6i_src.plen && saddr) {
1191 rt->rt6i_src.addr = *saddr;
1192 rt->rt6i_src.plen = 128;
8b9df265 1193 }
83a09abd 1194#endif
95a9a5ba 1195 }
1da177e4 1196
95a9a5ba
YH
1197 return rt;
1198}
1da177e4 1199
8d1c802b 1200static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
d52d3997 1201{
3b6761d1 1202 unsigned short flags = fib6_info_dst_flags(rt);
4832c30d 1203 struct net_device *dev;
d52d3997
MKL
1204 struct rt6_info *pcpu_rt;
1205
4832c30d
DA
1206 rcu_read_lock();
1207 dev = ip6_rt_get_dev_rcu(rt);
93531c67 1208 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1209 rcu_read_unlock();
d52d3997
MKL
1210 if (!pcpu_rt)
1211 return NULL;
1212 ip6_rt_copy_init(pcpu_rt, rt);
d52d3997
MKL
1213 pcpu_rt->rt6i_flags |= RTF_PCPU;
1214 return pcpu_rt;
1215}
1216
66f5d6ce 1217/* It should be called with rcu_read_lock() acquired */
8d1c802b 1218static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
d52d3997 1219{
a73e4195 1220 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1221
1222 p = this_cpu_ptr(rt->rt6i_pcpu);
1223 pcpu_rt = *p;
1224
d4ead6b3
DA
1225 if (pcpu_rt)
1226 ip6_hold_safe(NULL, &pcpu_rt, false);
d3843fe5 1227
a73e4195
MKL
1228 return pcpu_rt;
1229}
1230
afb1d4b5 1231static struct rt6_info *rt6_make_pcpu_route(struct net *net,
8d1c802b 1232 struct fib6_info *rt)
a73e4195
MKL
1233{
1234 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1235
1236 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1237 if (!pcpu_rt) {
9c7370a1
MKL
1238 dst_hold(&net->ipv6.ip6_null_entry->dst);
1239 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1240 }
1241
a94b9367
WW
1242 dst_hold(&pcpu_rt->dst);
1243 p = this_cpu_ptr(rt->rt6i_pcpu);
1244 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1245 BUG_ON(prev);
a94b9367 1246
d52d3997
MKL
1247 return pcpu_rt;
1248}
1249
35732d01
WW
1250/* exception hash table implementation
1251 */
1252static DEFINE_SPINLOCK(rt6_exception_lock);
1253
1254/* Remove rt6_ex from hash table and free the memory
1255 * Caller must hold rt6_exception_lock
1256 */
1257static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1258 struct rt6_exception *rt6_ex)
1259{
b2427e67 1260 struct net *net;
81eb8447 1261
35732d01
WW
1262 if (!bucket || !rt6_ex)
1263 return;
b2427e67
CIK
1264
1265 net = dev_net(rt6_ex->rt6i->dst.dev);
35732d01 1266 hlist_del_rcu(&rt6_ex->hlist);
77634cc6 1267 dst_release(&rt6_ex->rt6i->dst);
35732d01
WW
1268 kfree_rcu(rt6_ex, rcu);
1269 WARN_ON_ONCE(!bucket->depth);
1270 bucket->depth--;
81eb8447 1271 net->ipv6.rt6_stats->fib_rt_cache--;
35732d01
WW
1272}
1273
1274/* Remove oldest rt6_ex in bucket and free the memory
1275 * Caller must hold rt6_exception_lock
1276 */
1277static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1278{
1279 struct rt6_exception *rt6_ex, *oldest = NULL;
1280
1281 if (!bucket)
1282 return;
1283
1284 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1285 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1286 oldest = rt6_ex;
1287 }
1288 rt6_remove_exception(bucket, oldest);
1289}
1290
1291static u32 rt6_exception_hash(const struct in6_addr *dst,
1292 const struct in6_addr *src)
1293{
1294 static u32 seed __read_mostly;
1295 u32 val;
1296
1297 net_get_random_once(&seed, sizeof(seed));
1298 val = jhash(dst, sizeof(*dst), seed);
1299
1300#ifdef CONFIG_IPV6_SUBTREES
1301 if (src)
1302 val = jhash(src, sizeof(*src), val);
1303#endif
1304 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1305}
1306
1307/* Helper function to find the cached rt in the hash table
1308 * and update bucket pointer to point to the bucket for this
1309 * (daddr, saddr) pair
1310 * Caller must hold rt6_exception_lock
1311 */
1312static struct rt6_exception *
1313__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1314 const struct in6_addr *daddr,
1315 const struct in6_addr *saddr)
1316{
1317 struct rt6_exception *rt6_ex;
1318 u32 hval;
1319
1320 if (!(*bucket) || !daddr)
1321 return NULL;
1322
1323 hval = rt6_exception_hash(daddr, saddr);
1324 *bucket += hval;
1325
1326 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1327 struct rt6_info *rt6 = rt6_ex->rt6i;
1328 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1329
1330#ifdef CONFIG_IPV6_SUBTREES
1331 if (matched && saddr)
1332 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1333#endif
1334 if (matched)
1335 return rt6_ex;
1336 }
1337 return NULL;
1338}
1339
1340/* Helper function to find the cached rt in the hash table
1341 * and update bucket pointer to point to the bucket for this
1342 * (daddr, saddr) pair
1343 * Caller must hold rcu_read_lock()
1344 */
1345static struct rt6_exception *
1346__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1347 const struct in6_addr *daddr,
1348 const struct in6_addr *saddr)
1349{
1350 struct rt6_exception *rt6_ex;
1351 u32 hval;
1352
1353 WARN_ON_ONCE(!rcu_read_lock_held());
1354
1355 if (!(*bucket) || !daddr)
1356 return NULL;
1357
1358 hval = rt6_exception_hash(daddr, saddr);
1359 *bucket += hval;
1360
1361 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1362 struct rt6_info *rt6 = rt6_ex->rt6i;
1363 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1364
1365#ifdef CONFIG_IPV6_SUBTREES
1366 if (matched && saddr)
1367 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1368#endif
1369 if (matched)
1370 return rt6_ex;
1371 }
1372 return NULL;
1373}
1374
8d1c802b 1375static unsigned int fib6_mtu(const struct fib6_info *rt)
d4ead6b3
DA
1376{
1377 unsigned int mtu;
1378
dcd1f572
DA
1379 if (rt->fib6_pmtu) {
1380 mtu = rt->fib6_pmtu;
1381 } else {
1382 struct net_device *dev = fib6_info_nh_dev(rt);
1383 struct inet6_dev *idev;
1384
1385 rcu_read_lock();
1386 idev = __in6_dev_get(dev);
1387 mtu = idev->cnf.mtu6;
1388 rcu_read_unlock();
1389 }
1390
d4ead6b3
DA
1391 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1392
1393 return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1394}
1395
35732d01 1396static int rt6_insert_exception(struct rt6_info *nrt,
8d1c802b 1397 struct fib6_info *ort)
35732d01 1398{
5e670d84 1399 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1400 struct rt6_exception_bucket *bucket;
1401 struct in6_addr *src_key = NULL;
1402 struct rt6_exception *rt6_ex;
1403 int err = 0;
1404
35732d01
WW
1405 spin_lock_bh(&rt6_exception_lock);
1406
1407 if (ort->exception_bucket_flushed) {
1408 err = -EINVAL;
1409 goto out;
1410 }
1411
1412 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1413 lockdep_is_held(&rt6_exception_lock));
1414 if (!bucket) {
1415 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1416 GFP_ATOMIC);
1417 if (!bucket) {
1418 err = -ENOMEM;
1419 goto out;
1420 }
1421 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1422 }
1423
1424#ifdef CONFIG_IPV6_SUBTREES
1425 /* rt6i_src.plen != 0 indicates ort is in subtree
1426 * and exception table is indexed by a hash of
1427 * both rt6i_dst and rt6i_src.
1428 * Otherwise, the exception table is indexed by
1429 * a hash of only rt6i_dst.
1430 */
93c2fb25 1431 if (ort->fib6_src.plen)
35732d01
WW
1432 src_key = &nrt->rt6i_src.addr;
1433#endif
60006a48
WW
1434
1435 /* Update rt6i_prefsrc as it could be changed
1436 * in rt6_remove_prefsrc()
1437 */
93c2fb25 1438 nrt->rt6i_prefsrc = ort->fib6_prefsrc;
f5bbe7ee
WW
1439 /* rt6_mtu_change() might lower mtu on ort.
1440 * Only insert this exception route if its mtu
1441 * is less than ort's mtu value.
1442 */
d4ead6b3 1443 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
f5bbe7ee
WW
1444 err = -EINVAL;
1445 goto out;
1446 }
60006a48 1447
35732d01
WW
1448 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1449 src_key);
1450 if (rt6_ex)
1451 rt6_remove_exception(bucket, rt6_ex);
1452
1453 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1454 if (!rt6_ex) {
1455 err = -ENOMEM;
1456 goto out;
1457 }
1458 rt6_ex->rt6i = nrt;
1459 rt6_ex->stamp = jiffies;
35732d01
WW
1460 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1461 bucket->depth++;
81eb8447 1462 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1463
1464 if (bucket->depth > FIB6_MAX_DEPTH)
1465 rt6_exception_remove_oldest(bucket);
1466
1467out:
1468 spin_unlock_bh(&rt6_exception_lock);
1469
1470 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1471 if (!err) {
93c2fb25 1472 spin_lock_bh(&ort->fib6_table->tb6_lock);
7aef6859 1473 fib6_update_sernum(net, ort);
93c2fb25 1474 spin_unlock_bh(&ort->fib6_table->tb6_lock);
b886d5f2
PA
1475 fib6_force_start_gc(net);
1476 }
35732d01
WW
1477
1478 return err;
1479}
1480
8d1c802b 1481void rt6_flush_exceptions(struct fib6_info *rt)
35732d01
WW
1482{
1483 struct rt6_exception_bucket *bucket;
1484 struct rt6_exception *rt6_ex;
1485 struct hlist_node *tmp;
1486 int i;
1487
1488 spin_lock_bh(&rt6_exception_lock);
1489 /* Prevent rt6_insert_exception() to recreate the bucket list */
1490 rt->exception_bucket_flushed = 1;
1491
1492 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1493 lockdep_is_held(&rt6_exception_lock));
1494 if (!bucket)
1495 goto out;
1496
1497 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1498 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1499 rt6_remove_exception(bucket, rt6_ex);
1500 WARN_ON_ONCE(bucket->depth);
1501 bucket++;
1502 }
1503
1504out:
1505 spin_unlock_bh(&rt6_exception_lock);
1506}
1507
1508/* Find cached rt in the hash table inside passed in rt
1509 * Caller has to hold rcu_read_lock()
1510 */
8d1c802b 1511static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
1512 struct in6_addr *daddr,
1513 struct in6_addr *saddr)
1514{
1515 struct rt6_exception_bucket *bucket;
1516 struct in6_addr *src_key = NULL;
1517 struct rt6_exception *rt6_ex;
1518 struct rt6_info *res = NULL;
1519
1520 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1521
1522#ifdef CONFIG_IPV6_SUBTREES
1523 /* rt6i_src.plen != 0 indicates rt is in subtree
1524 * and exception table is indexed by a hash of
1525 * both rt6i_dst and rt6i_src.
1526 * Otherwise, the exception table is indexed by
1527 * a hash of only rt6i_dst.
1528 */
93c2fb25 1529 if (rt->fib6_src.plen)
35732d01
WW
1530 src_key = saddr;
1531#endif
1532 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1533
1534 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1535 res = rt6_ex->rt6i;
1536
1537 return res;
1538}
1539
1540/* Remove the passed in cached rt from the hash table that contains it */
23fb93a4 1541static int rt6_remove_exception_rt(struct rt6_info *rt)
35732d01 1542{
35732d01
WW
1543 struct rt6_exception_bucket *bucket;
1544 struct in6_addr *src_key = NULL;
1545 struct rt6_exception *rt6_ex;
8a14e46f 1546 struct fib6_info *from;
35732d01
WW
1547 int err;
1548
091311de 1549 from = rcu_dereference(rt->from);
35732d01 1550 if (!from ||
442d713b 1551 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1552 return -EINVAL;
1553
1554 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1555 return -ENOENT;
1556
1557 spin_lock_bh(&rt6_exception_lock);
1558 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1559 lockdep_is_held(&rt6_exception_lock));
1560#ifdef CONFIG_IPV6_SUBTREES
1561 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1562 * and exception table is indexed by a hash of
1563 * both rt6i_dst and rt6i_src.
1564 * Otherwise, the exception table is indexed by
1565 * a hash of only rt6i_dst.
1566 */
93c2fb25 1567 if (from->fib6_src.plen)
35732d01
WW
1568 src_key = &rt->rt6i_src.addr;
1569#endif
1570 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1571 &rt->rt6i_dst.addr,
1572 src_key);
1573 if (rt6_ex) {
1574 rt6_remove_exception(bucket, rt6_ex);
1575 err = 0;
1576 } else {
1577 err = -ENOENT;
1578 }
1579
1580 spin_unlock_bh(&rt6_exception_lock);
1581 return err;
1582}
1583
1584/* Find rt6_ex which contains the passed in rt cache and
1585 * refresh its stamp
1586 */
1587static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1588{
35732d01 1589 struct rt6_exception_bucket *bucket;
8d1c802b 1590 struct fib6_info *from = rt->from;
35732d01
WW
1591 struct in6_addr *src_key = NULL;
1592 struct rt6_exception *rt6_ex;
1593
1594 if (!from ||
442d713b 1595 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1596 return;
1597
1598 rcu_read_lock();
1599 bucket = rcu_dereference(from->rt6i_exception_bucket);
1600
1601#ifdef CONFIG_IPV6_SUBTREES
1602 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1603 * and exception table is indexed by a hash of
1604 * both rt6i_dst and rt6i_src.
1605 * Otherwise, the exception table is indexed by
1606 * a hash of only rt6i_dst.
1607 */
93c2fb25 1608 if (from->fib6_src.plen)
35732d01
WW
1609 src_key = &rt->rt6i_src.addr;
1610#endif
1611 rt6_ex = __rt6_find_exception_rcu(&bucket,
1612 &rt->rt6i_dst.addr,
1613 src_key);
1614 if (rt6_ex)
1615 rt6_ex->stamp = jiffies;
1616
1617 rcu_read_unlock();
1618}
1619
8d1c802b 1620static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)
60006a48
WW
1621{
1622 struct rt6_exception_bucket *bucket;
1623 struct rt6_exception *rt6_ex;
1624 int i;
1625
1626 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1627 lockdep_is_held(&rt6_exception_lock));
1628
1629 if (bucket) {
1630 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1631 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1632 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1633 }
1634 bucket++;
1635 }
1636 }
1637}
1638
e9fa1495
SB
1639static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1640 struct rt6_info *rt, int mtu)
1641{
1642 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1643 * lowest MTU in the path: always allow updating the route PMTU to
1644 * reflect PMTU decreases.
1645 *
1646 * If the new MTU is higher, and the route PMTU is equal to the local
1647 * MTU, this means the old MTU is the lowest in the path, so allow
1648 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1649 * handle this.
1650 */
1651
1652 if (dst_mtu(&rt->dst) >= mtu)
1653 return true;
1654
1655 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1656 return true;
1657
1658 return false;
1659}
1660
1661static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
8d1c802b 1662 struct fib6_info *rt, int mtu)
f5bbe7ee
WW
1663{
1664 struct rt6_exception_bucket *bucket;
1665 struct rt6_exception *rt6_ex;
1666 int i;
1667
1668 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1669 lockdep_is_held(&rt6_exception_lock));
1670
e9fa1495
SB
1671 if (!bucket)
1672 return;
1673
1674 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1675 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1676 struct rt6_info *entry = rt6_ex->rt6i;
1677
1678 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1679 * route), the metrics of its rt->from have already
e9fa1495
SB
1680 * been updated.
1681 */
d4ead6b3 1682 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1683 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1684 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1685 }
e9fa1495 1686 bucket++;
f5bbe7ee
WW
1687 }
1688}
1689
b16cb459
WW
1690#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1691
8d1c802b 1692static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
b16cb459
WW
1693 struct in6_addr *gateway)
1694{
1695 struct rt6_exception_bucket *bucket;
1696 struct rt6_exception *rt6_ex;
1697 struct hlist_node *tmp;
1698 int i;
1699
1700 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1701 return;
1702
1703 spin_lock_bh(&rt6_exception_lock);
1704 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1705 lockdep_is_held(&rt6_exception_lock));
1706
1707 if (bucket) {
1708 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1709 hlist_for_each_entry_safe(rt6_ex, tmp,
1710 &bucket->chain, hlist) {
1711 struct rt6_info *entry = rt6_ex->rt6i;
1712
1713 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1714 RTF_CACHE_GATEWAY &&
1715 ipv6_addr_equal(gateway,
1716 &entry->rt6i_gateway)) {
1717 rt6_remove_exception(bucket, rt6_ex);
1718 }
1719 }
1720 bucket++;
1721 }
1722 }
1723
1724 spin_unlock_bh(&rt6_exception_lock);
1725}
1726
c757faa8
WW
1727static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1728 struct rt6_exception *rt6_ex,
1729 struct fib6_gc_args *gc_args,
1730 unsigned long now)
1731{
1732 struct rt6_info *rt = rt6_ex->rt6i;
1733
1859bac0
PA
1734 /* we are pruning and obsoleting aged-out and non gateway exceptions
1735 * even if others have still references to them, so that on next
1736 * dst_check() such references can be dropped.
1737 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1738 * expired, independently from their aging, as per RFC 8201 section 4
1739 */
31afeb42
WW
1740 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1741 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1742 RT6_TRACE("aging clone %p\n", rt);
1743 rt6_remove_exception(bucket, rt6_ex);
1744 return;
1745 }
1746 } else if (time_after(jiffies, rt->dst.expires)) {
1747 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1748 rt6_remove_exception(bucket, rt6_ex);
1749 return;
31afeb42
WW
1750 }
1751
1752 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1753 struct neighbour *neigh;
1754 __u8 neigh_flags = 0;
1755
1bfa26ff
ED
1756 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1757 if (neigh)
c757faa8 1758 neigh_flags = neigh->flags;
1bfa26ff 1759
c757faa8
WW
1760 if (!(neigh_flags & NTF_ROUTER)) {
1761 RT6_TRACE("purging route %p via non-router but gateway\n",
1762 rt);
1763 rt6_remove_exception(bucket, rt6_ex);
1764 return;
1765 }
1766 }
31afeb42 1767
c757faa8
WW
1768 gc_args->more++;
1769}
1770
8d1c802b 1771void rt6_age_exceptions(struct fib6_info *rt,
c757faa8
WW
1772 struct fib6_gc_args *gc_args,
1773 unsigned long now)
1774{
1775 struct rt6_exception_bucket *bucket;
1776 struct rt6_exception *rt6_ex;
1777 struct hlist_node *tmp;
1778 int i;
1779
1780 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1781 return;
1782
1bfa26ff
ED
1783 rcu_read_lock_bh();
1784 spin_lock(&rt6_exception_lock);
c757faa8
WW
1785 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1786 lockdep_is_held(&rt6_exception_lock));
1787
1788 if (bucket) {
1789 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1790 hlist_for_each_entry_safe(rt6_ex, tmp,
1791 &bucket->chain, hlist) {
1792 rt6_age_examine_exception(bucket, rt6_ex,
1793 gc_args, now);
1794 }
1795 bucket++;
1796 }
1797 }
1bfa26ff
ED
1798 spin_unlock(&rt6_exception_lock);
1799 rcu_read_unlock_bh();
c757faa8
WW
1800}
1801
9ff74384 1802struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
b75cc8f9
DA
1803 int oif, struct flowi6 *fl6,
1804 const struct sk_buff *skb, int flags)
1da177e4 1805{
367efcb9 1806 struct fib6_node *fn, *saved_fn;
8d1c802b 1807 struct fib6_info *f6i;
23fb93a4 1808 struct rt6_info *rt;
c71099ac 1809 int strict = 0;
1da177e4 1810
77d16f45 1811 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1812 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1813 if (net->ipv6.devconf_all->forwarding == 0)
1814 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1815
66f5d6ce 1816 rcu_read_lock();
1da177e4 1817
4c9483b2 1818 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1819 saved_fn = fn;
1da177e4 1820
ca254490
DA
1821 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1822 oif = 0;
1823
a3c00e46 1824redo_rt6_select:
23fb93a4 1825 f6i = rt6_select(net, fn, oif, strict);
93c2fb25 1826 if (f6i->fib6_nsiblings)
23fb93a4
DA
1827 f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict);
1828 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1829 fn = fib6_backtrack(fn, &fl6->saddr);
1830 if (fn)
1831 goto redo_rt6_select;
367efcb9
MKL
1832 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1833 /* also consider unreachable route */
1834 strict &= ~RT6_LOOKUP_F_REACHABLE;
1835 fn = saved_fn;
1836 goto redo_rt6_select;
367efcb9 1837 }
a3c00e46
MKL
1838 }
1839
23fb93a4 1840 if (f6i == net->ipv6.fib6_null_entry) {
421842ed 1841 rt = net->ipv6.ip6_null_entry;
66f5d6ce 1842 rcu_read_unlock();
d3843fe5 1843 dst_hold(&rt->dst);
b65f164d 1844 trace_fib6_table_lookup(net, rt, table, fl6);
d3843fe5 1845 return rt;
23fb93a4
DA
1846 }
1847
1848 /*Search through exception table */
1849 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1850 if (rt) {
d4ead6b3 1851 if (ip6_hold_safe(net, &rt, true))
d3843fe5 1852 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 1853
66f5d6ce 1854 rcu_read_unlock();
b65f164d 1855 trace_fib6_table_lookup(net, rt, table, fl6);
d52d3997 1856 return rt;
3da59bd9 1857 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
93c2fb25 1858 !(f6i->fib6_flags & RTF_GATEWAY))) {
3da59bd9
MKL
1859 /* Create a RTF_CACHE clone which will not be
1860 * owned by the fib6 tree. It is for the special case where
1861 * the daddr in the skb during the neighbor look-up is different
1862 * from the fl6->daddr used to look-up route here.
1863 */
3da59bd9
MKL
1864 struct rt6_info *uncached_rt;
1865
23fb93a4 1866 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
4d85cd0c
DA
1867
1868 rcu_read_unlock();
c71099ac 1869
1cfb71ee
WW
1870 if (uncached_rt) {
1871 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1872 * No need for another dst_hold()
1873 */
8d0b94af 1874 rt6_uncached_list_add(uncached_rt);
81eb8447 1875 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1876 } else {
3da59bd9 1877 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1878 dst_hold(&uncached_rt->dst);
1879 }
b811580d 1880
b65f164d 1881 trace_fib6_table_lookup(net, uncached_rt, table, fl6);
3da59bd9 1882 return uncached_rt;
3da59bd9 1883
d52d3997
MKL
1884 } else {
1885 /* Get a percpu copy */
1886
1887 struct rt6_info *pcpu_rt;
1888
951f788a 1889 local_bh_disable();
23fb93a4 1890 pcpu_rt = rt6_get_pcpu_route(f6i);
d52d3997 1891
93531c67
DA
1892 if (!pcpu_rt)
1893 pcpu_rt = rt6_make_pcpu_route(net, f6i);
1894
951f788a
ED
1895 local_bh_enable();
1896 rcu_read_unlock();
b65f164d 1897 trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
d52d3997
MKL
1898 return pcpu_rt;
1899 }
1da177e4 1900}
9ff74384 1901EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1902
b75cc8f9
DA
1903static struct rt6_info *ip6_pol_route_input(struct net *net,
1904 struct fib6_table *table,
1905 struct flowi6 *fl6,
1906 const struct sk_buff *skb,
1907 int flags)
4acad72d 1908{
b75cc8f9 1909 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1910}
1911
d409b847
MB
1912struct dst_entry *ip6_route_input_lookup(struct net *net,
1913 struct net_device *dev,
b75cc8f9
DA
1914 struct flowi6 *fl6,
1915 const struct sk_buff *skb,
1916 int flags)
72331bc0
SL
1917{
1918 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1919 flags |= RT6_LOOKUP_F_IFACE;
1920
b75cc8f9 1921 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1922}
d409b847 1923EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1924
23aebdac 1925static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1926 struct flow_keys *keys,
1927 struct flow_keys *flkeys)
23aebdac
JS
1928{
1929 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1930 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1931 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1932 const struct ipv6hdr *inner_iph;
1933 const struct icmp6hdr *icmph;
1934 struct ipv6hdr _inner_iph;
cea67a2d 1935 struct icmp6hdr _icmph;
23aebdac
JS
1936
1937 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1938 goto out;
1939
cea67a2d
ED
1940 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1941 sizeof(_icmph), &_icmph);
1942 if (!icmph)
1943 goto out;
1944
23aebdac
JS
1945 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1946 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1947 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1948 icmph->icmp6_type != ICMPV6_PARAMPROB)
1949 goto out;
1950
1951 inner_iph = skb_header_pointer(skb,
1952 skb_transport_offset(skb) + sizeof(*icmph),
1953 sizeof(_inner_iph), &_inner_iph);
1954 if (!inner_iph)
1955 goto out;
1956
1957 key_iph = inner_iph;
5e5d6fed 1958 _flkeys = NULL;
23aebdac 1959out:
5e5d6fed
RP
1960 if (_flkeys) {
1961 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1962 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1963 keys->tags.flow_label = _flkeys->tags.flow_label;
1964 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1965 } else {
1966 keys->addrs.v6addrs.src = key_iph->saddr;
1967 keys->addrs.v6addrs.dst = key_iph->daddr;
1968 keys->tags.flow_label = ip6_flowinfo(key_iph);
1969 keys->basic.ip_proto = key_iph->nexthdr;
1970 }
23aebdac
JS
1971}
1972
1973/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
1974u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1975 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
1976{
1977 struct flow_keys hash_keys;
9a2a537a 1978 u32 mhash;
23aebdac 1979
bbfa047a 1980 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
1981 case 0:
1982 memset(&hash_keys, 0, sizeof(hash_keys));
1983 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1984 if (skb) {
1985 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
1986 } else {
1987 hash_keys.addrs.v6addrs.src = fl6->saddr;
1988 hash_keys.addrs.v6addrs.dst = fl6->daddr;
1989 hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
1990 hash_keys.basic.ip_proto = fl6->flowi6_proto;
1991 }
1992 break;
1993 case 1:
1994 if (skb) {
1995 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1996 struct flow_keys keys;
1997
1998 /* short-circuit if we already have L4 hash present */
1999 if (skb->l4_hash)
2000 return skb_get_hash_raw(skb) >> 1;
2001
2002 memset(&hash_keys, 0, sizeof(hash_keys));
2003
2004 if (!flkeys) {
2005 skb_flow_dissect_flow_keys(skb, &keys, flag);
2006 flkeys = &keys;
2007 }
2008 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2009 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2010 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2011 hash_keys.ports.src = flkeys->ports.src;
2012 hash_keys.ports.dst = flkeys->ports.dst;
2013 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2014 } else {
2015 memset(&hash_keys, 0, sizeof(hash_keys));
2016 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2017 hash_keys.addrs.v6addrs.src = fl6->saddr;
2018 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2019 hash_keys.ports.src = fl6->fl6_sport;
2020 hash_keys.ports.dst = fl6->fl6_dport;
2021 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2022 }
2023 break;
23aebdac 2024 }
9a2a537a 2025 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2026
9a2a537a 2027 return mhash >> 1;
23aebdac
JS
2028}
2029
c71099ac
TG
2030void ip6_route_input(struct sk_buff *skb)
2031{
b71d1d42 2032 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2033 struct net *net = dev_net(skb->dev);
adaa70bb 2034 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2035 struct ip_tunnel_info *tun_info;
4c9483b2 2036 struct flowi6 fl6 = {
e0d56fdd 2037 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2038 .daddr = iph->daddr,
2039 .saddr = iph->saddr,
6502ca52 2040 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2041 .flowi6_mark = skb->mark,
2042 .flowi6_proto = iph->nexthdr,
c71099ac 2043 };
5e5d6fed 2044 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2045
904af04d 2046 tun_info = skb_tunnel_info(skb);
46fa062a 2047 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2048 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2049
2050 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2051 flkeys = &_flkeys;
2052
23aebdac 2053 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2054 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2055 skb_dst_drop(skb);
b75cc8f9
DA
2056 skb_dst_set(skb,
2057 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2058}
2059
b75cc8f9
DA
2060static struct rt6_info *ip6_pol_route_output(struct net *net,
2061 struct fib6_table *table,
2062 struct flowi6 *fl6,
2063 const struct sk_buff *skb,
2064 int flags)
1da177e4 2065{
b75cc8f9 2066 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2067}
2068
6f21c96a
PA
2069struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2070 struct flowi6 *fl6, int flags)
c71099ac 2071{
d46a9d67 2072 bool any_src;
c71099ac 2073
4c1feac5
DA
2074 if (rt6_need_strict(&fl6->daddr)) {
2075 struct dst_entry *dst;
2076
2077 dst = l3mdev_link_scope_lookup(net, fl6);
2078 if (dst)
2079 return dst;
2080 }
ca254490 2081
1fb9489b 2082 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2083
d46a9d67 2084 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2085 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2086 (fl6->flowi6_oif && any_src))
77d16f45 2087 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2088
d46a9d67 2089 if (!any_src)
adaa70bb 2090 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2091 else if (sk)
2092 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2093
b75cc8f9 2094 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2095}
6f21c96a 2096EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2097
2774c131 2098struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2099{
5c1e6aa3 2100 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2101 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2102 struct dst_entry *new = NULL;
2103
1dbe3252 2104 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2105 DST_OBSOLETE_DEAD, 0);
14e50e57 2106 if (rt) {
0a1f5962 2107 rt6_info_init(rt);
81eb8447 2108 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2109
0a1f5962 2110 new = &rt->dst;
14e50e57 2111 new->__use = 1;
352e512c 2112 new->input = dst_discard;
ede2059d 2113 new->output = dst_discard_out;
14e50e57 2114
0a1f5962 2115 dst_copy_metrics(new, &ort->dst);
14e50e57 2116
1dbe3252 2117 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2118 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2119 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2120
2121 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2122#ifdef CONFIG_IPV6_SUBTREES
2123 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2124#endif
14e50e57
DM
2125 }
2126
69ead7af
DM
2127 dst_release(dst_orig);
2128 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2129}
14e50e57 2130
1da177e4
LT
2131/*
2132 * Destination cache support functions
2133 */
2134
8d1c802b 2135static bool fib6_check(struct fib6_info *f6i, u32 cookie)
93531c67
DA
2136{
2137 u32 rt_cookie = 0;
2138
8ae86971 2139 if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
93531c67
DA
2140 return false;
2141
2142 if (fib6_check_expired(f6i))
2143 return false;
2144
2145 return true;
2146}
2147
a68886a6
DA
2148static struct dst_entry *rt6_check(struct rt6_info *rt,
2149 struct fib6_info *from,
2150 u32 cookie)
3da59bd9 2151{
36143645 2152 u32 rt_cookie = 0;
c5cff856 2153
a68886a6 2154 if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
93531c67 2155 rt_cookie != cookie)
3da59bd9
MKL
2156 return NULL;
2157
2158 if (rt6_check_expired(rt))
2159 return NULL;
2160
2161 return &rt->dst;
2162}
2163
a68886a6
DA
2164static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2165 struct fib6_info *from,
2166 u32 cookie)
3da59bd9 2167{
5973fb1e
MKL
2168 if (!__rt6_check_expired(rt) &&
2169 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
a68886a6 2170 fib6_check(from, cookie))
3da59bd9
MKL
2171 return &rt->dst;
2172 else
2173 return NULL;
2174}
2175
1da177e4
LT
2176static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2177{
a87b7dc9 2178 struct dst_entry *dst_ret;
a68886a6 2179 struct fib6_info *from;
1da177e4
LT
2180 struct rt6_info *rt;
2181
a87b7dc9
DA
2182 rt = container_of(dst, struct rt6_info, dst);
2183
2184 rcu_read_lock();
1da177e4 2185
6f3118b5
ND
2186 /* All IPV6 dsts are created with ->obsolete set to the value
2187 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2188 * into this function always.
2189 */
e3bc10bd 2190
a68886a6
DA
2191 from = rcu_dereference(rt->from);
2192
2193 if (from && (rt->rt6i_flags & RTF_PCPU ||
2194 unlikely(!list_empty(&rt->rt6i_uncached))))
2195 dst_ret = rt6_dst_from_check(rt, from, cookie);
3da59bd9 2196 else
a68886a6 2197 dst_ret = rt6_check(rt, from, cookie);
a87b7dc9
DA
2198
2199 rcu_read_unlock();
2200
2201 return dst_ret;
1da177e4
LT
2202}
2203
2204static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2205{
2206 struct rt6_info *rt = (struct rt6_info *) dst;
2207
2208 if (rt) {
54c1a859 2209 if (rt->rt6i_flags & RTF_CACHE) {
c3c14da0 2210 rcu_read_lock();
54c1a859 2211 if (rt6_check_expired(rt)) {
93531c67 2212 rt6_remove_exception_rt(rt);
54c1a859
YH
2213 dst = NULL;
2214 }
c3c14da0 2215 rcu_read_unlock();
54c1a859 2216 } else {
1da177e4 2217 dst_release(dst);
54c1a859
YH
2218 dst = NULL;
2219 }
1da177e4 2220 }
54c1a859 2221 return dst;
1da177e4
LT
2222}
2223
2224static void ip6_link_failure(struct sk_buff *skb)
2225{
2226 struct rt6_info *rt;
2227
3ffe533c 2228 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2229
adf30907 2230 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2231 if (rt) {
8a14e46f 2232 rcu_read_lock();
1eb4f758 2233 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0 2234 if (dst_hold_safe(&rt->dst))
93531c67 2235 rt6_remove_exception_rt(rt);
a68886a6
DA
2236 } else {
2237 struct fib6_info *from;
c5cff856
WW
2238 struct fib6_node *fn;
2239
a68886a6
DA
2240 from = rcu_dereference(rt->from);
2241 if (from) {
2242 fn = rcu_dereference(from->fib6_node);
2243 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2244 fn->fn_sernum = -1;
2245 }
1eb4f758 2246 }
8a14e46f 2247 rcu_read_unlock();
1da177e4
LT
2248 }
2249}
2250
6a3e030f
DA
2251static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2252{
a68886a6
DA
2253 if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2254 struct fib6_info *from;
2255
2256 rcu_read_lock();
2257 from = rcu_dereference(rt0->from);
2258 if (from)
2259 rt0->dst.expires = from->expires;
2260 rcu_read_unlock();
2261 }
6a3e030f
DA
2262
2263 dst_set_expires(&rt0->dst, timeout);
2264 rt0->rt6i_flags |= RTF_EXPIRES;
2265}
2266
45e4fd26
MKL
2267static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2268{
2269 struct net *net = dev_net(rt->dst.dev);
2270
d4ead6b3 2271 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2272 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2273 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2274}
2275
0d3f6d29
MKL
2276static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2277{
a68886a6
DA
2278 bool from_set;
2279
2280 rcu_read_lock();
2281 from_set = !!rcu_dereference(rt->from);
2282 rcu_read_unlock();
2283
0d3f6d29 2284 return !(rt->rt6i_flags & RTF_CACHE) &&
a68886a6 2285 (rt->rt6i_flags & RTF_PCPU || from_set);
0d3f6d29
MKL
2286}
2287
45e4fd26
MKL
2288static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2289 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2290{
0dec879f 2291 const struct in6_addr *daddr, *saddr;
67ba4152 2292 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2293
45e4fd26
MKL
2294 if (rt6->rt6i_flags & RTF_LOCAL)
2295 return;
81aded24 2296
19bda36c
XL
2297 if (dst_metric_locked(dst, RTAX_MTU))
2298 return;
2299
0dec879f
JA
2300 if (iph) {
2301 daddr = &iph->daddr;
2302 saddr = &iph->saddr;
2303 } else if (sk) {
2304 daddr = &sk->sk_v6_daddr;
2305 saddr = &inet6_sk(sk)->saddr;
2306 } else {
2307 daddr = NULL;
2308 saddr = NULL;
2309 }
2310 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2311 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2312 if (mtu >= dst_mtu(dst))
2313 return;
9d289715 2314
0d3f6d29 2315 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2316 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2317 /* update rt6_ex->stamp for cache */
2318 if (rt6->rt6i_flags & RTF_CACHE)
2319 rt6_update_exception_stamp_rt(rt6);
0dec879f 2320 } else if (daddr) {
a68886a6 2321 struct fib6_info *from;
45e4fd26
MKL
2322 struct rt6_info *nrt6;
2323
4d85cd0c 2324 rcu_read_lock();
a68886a6
DA
2325 from = rcu_dereference(rt6->from);
2326 nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
45e4fd26
MKL
2327 if (nrt6) {
2328 rt6_do_update_pmtu(nrt6, mtu);
a68886a6 2329 if (rt6_insert_exception(nrt6, from))
2b760fcf 2330 dst_release_immediate(&nrt6->dst);
45e4fd26 2331 }
a68886a6 2332 rcu_read_unlock();
1da177e4
LT
2333 }
2334}
2335
45e4fd26
MKL
2336static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2337 struct sk_buff *skb, u32 mtu)
2338{
2339 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2340}
2341
42ae66c8 2342void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2343 int oif, u32 mark, kuid_t uid)
81aded24
DM
2344{
2345 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2346 struct dst_entry *dst;
2347 struct flowi6 fl6;
2348
2349 memset(&fl6, 0, sizeof(fl6));
2350 fl6.flowi6_oif = oif;
1b3c61dc 2351 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
2352 fl6.daddr = iph->daddr;
2353 fl6.saddr = iph->saddr;
6502ca52 2354 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2355 fl6.flowi6_uid = uid;
81aded24
DM
2356
2357 dst = ip6_route_output(net, NULL, &fl6);
2358 if (!dst->error)
45e4fd26 2359 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2360 dst_release(dst);
2361}
2362EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2363
2364void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2365{
33c162a9
MKL
2366 struct dst_entry *dst;
2367
81aded24 2368 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 2369 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2370
2371 dst = __sk_dst_get(sk);
2372 if (!dst || !dst->obsolete ||
2373 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2374 return;
2375
2376 bh_lock_sock(sk);
2377 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2378 ip6_datagram_dst_update(sk, false);
2379 bh_unlock_sock(sk);
81aded24
DM
2380}
2381EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2382
7d6850f7
AK
2383void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2384 const struct flowi6 *fl6)
2385{
2386#ifdef CONFIG_IPV6_SUBTREES
2387 struct ipv6_pinfo *np = inet6_sk(sk);
2388#endif
2389
2390 ip6_dst_store(sk, dst,
2391 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2392 &sk->sk_v6_daddr : NULL,
2393#ifdef CONFIG_IPV6_SUBTREES
2394 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2395 &np->saddr :
2396#endif
2397 NULL);
2398}
2399
b55b76b2
DJ
2400/* Handle redirects */
2401struct ip6rd_flowi {
2402 struct flowi6 fl6;
2403 struct in6_addr gateway;
2404};
2405
2406static struct rt6_info *__ip6_route_redirect(struct net *net,
2407 struct fib6_table *table,
2408 struct flowi6 *fl6,
b75cc8f9 2409 const struct sk_buff *skb,
b55b76b2
DJ
2410 int flags)
2411{
2412 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
23fb93a4 2413 struct rt6_info *ret = NULL, *rt_cache;
8d1c802b 2414 struct fib6_info *rt;
b55b76b2
DJ
2415 struct fib6_node *fn;
2416
2417 /* Get the "current" route for this destination and
67c408cf 2418 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2419 *
2420 * RFC 4861 specifies that redirects should only be
2421 * accepted if they come from the nexthop to the target.
2422 * Due to the way the routes are chosen, this notion
2423 * is a bit fuzzy and one might need to check all possible
2424 * routes.
2425 */
2426
66f5d6ce 2427 rcu_read_lock();
b55b76b2
DJ
2428 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2429restart:
66f5d6ce 2430 for_each_fib6_node_rt_rcu(fn) {
5e670d84 2431 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c 2432 continue;
14895687 2433 if (fib6_check_expired(rt))
b55b76b2 2434 continue;
93c2fb25 2435 if (rt->fib6_flags & RTF_REJECT)
b55b76b2 2436 break;
93c2fb25 2437 if (!(rt->fib6_flags & RTF_GATEWAY))
b55b76b2 2438 continue;
5e670d84 2439 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
b55b76b2 2440 continue;
2b760fcf
WW
2441 /* rt_cache's gateway might be different from its 'parent'
2442 * in the case of an ip redirect.
2443 * So we keep searching in the exception table if the gateway
2444 * is different.
2445 */
5e670d84 2446 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2b760fcf
WW
2447 rt_cache = rt6_find_cached_rt(rt,
2448 &fl6->daddr,
2449 &fl6->saddr);
2450 if (rt_cache &&
2451 ipv6_addr_equal(&rdfl->gateway,
2452 &rt_cache->rt6i_gateway)) {
23fb93a4 2453 ret = rt_cache;
2b760fcf
WW
2454 break;
2455 }
b55b76b2 2456 continue;
2b760fcf 2457 }
b55b76b2
DJ
2458 break;
2459 }
2460
2461 if (!rt)
421842ed 2462 rt = net->ipv6.fib6_null_entry;
93c2fb25 2463 else if (rt->fib6_flags & RTF_REJECT) {
23fb93a4 2464 ret = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2465 goto out;
2466 }
2467
421842ed 2468 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2469 fn = fib6_backtrack(fn, &fl6->saddr);
2470 if (fn)
2471 goto restart;
b55b76b2 2472 }
a3c00e46 2473
b0a1ba59 2474out:
23fb93a4
DA
2475 if (ret)
2476 dst_hold(&ret->dst);
2477 else
2478 ret = ip6_create_rt_rcu(rt);
b55b76b2 2479
66f5d6ce 2480 rcu_read_unlock();
b55b76b2 2481
23fb93a4
DA
2482 trace_fib6_table_lookup(net, ret, table, fl6);
2483 return ret;
b55b76b2
DJ
2484};
2485
2486static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2487 const struct flowi6 *fl6,
2488 const struct sk_buff *skb,
2489 const struct in6_addr *gateway)
b55b76b2
DJ
2490{
2491 int flags = RT6_LOOKUP_F_HAS_SADDR;
2492 struct ip6rd_flowi rdfl;
2493
2494 rdfl.fl6 = *fl6;
2495 rdfl.gateway = *gateway;
2496
b75cc8f9 2497 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2498 flags, __ip6_route_redirect);
2499}
2500
e2d118a1
LC
2501void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2502 kuid_t uid)
3a5ad2ee
DM
2503{
2504 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2505 struct dst_entry *dst;
2506 struct flowi6 fl6;
2507
2508 memset(&fl6, 0, sizeof(fl6));
e374c618 2509 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2510 fl6.flowi6_oif = oif;
2511 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2512 fl6.daddr = iph->daddr;
2513 fl6.saddr = iph->saddr;
6502ca52 2514 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2515 fl6.flowi6_uid = uid;
3a5ad2ee 2516
b75cc8f9 2517 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2518 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2519 dst_release(dst);
2520}
2521EXPORT_SYMBOL_GPL(ip6_redirect);
2522
c92a59ec
DJ
2523void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2524 u32 mark)
2525{
2526 const struct ipv6hdr *iph = ipv6_hdr(skb);
2527 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2528 struct dst_entry *dst;
2529 struct flowi6 fl6;
2530
2531 memset(&fl6, 0, sizeof(fl6));
e374c618 2532 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2533 fl6.flowi6_oif = oif;
2534 fl6.flowi6_mark = mark;
c92a59ec
DJ
2535 fl6.daddr = msg->dest;
2536 fl6.saddr = iph->daddr;
e2d118a1 2537 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2538
b75cc8f9 2539 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2540 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2541 dst_release(dst);
2542}
2543
3a5ad2ee
DM
2544void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2545{
e2d118a1
LC
2546 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2547 sk->sk_uid);
3a5ad2ee
DM
2548}
2549EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2550
0dbaee3b 2551static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2552{
0dbaee3b
DM
2553 struct net_device *dev = dst->dev;
2554 unsigned int mtu = dst_mtu(dst);
2555 struct net *net = dev_net(dev);
2556
1da177e4
LT
2557 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2558
5578689a
DL
2559 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2560 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2561
2562 /*
1ab1457c
YH
2563 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2564 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2565 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2566 * rely only on pmtu discovery"
2567 */
2568 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2569 mtu = IPV6_MAXPLEN;
2570 return mtu;
2571}
2572
ebb762f2 2573static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2574{
d33e4553 2575 struct inet6_dev *idev;
d4ead6b3 2576 unsigned int mtu;
4b32b5ad
MKL
2577
2578 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2579 if (mtu)
30f78d8e 2580 goto out;
618f9bc7
SK
2581
2582 mtu = IPV6_MIN_MTU;
d33e4553
DM
2583
2584 rcu_read_lock();
2585 idev = __in6_dev_get(dst->dev);
2586 if (idev)
2587 mtu = idev->cnf.mtu6;
2588 rcu_read_unlock();
2589
30f78d8e 2590out:
14972cbd
RP
2591 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2592
2593 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2594}
2595
3b00944c 2596struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2597 struct flowi6 *fl6)
1da177e4 2598{
87a11578 2599 struct dst_entry *dst;
1da177e4
LT
2600 struct rt6_info *rt;
2601 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2602 struct net *net = dev_net(dev);
1da177e4 2603
38308473 2604 if (unlikely(!idev))
122bdf67 2605 return ERR_PTR(-ENODEV);
1da177e4 2606
ad706862 2607 rt = ip6_dst_alloc(net, dev, 0);
38308473 2608 if (unlikely(!rt)) {
1da177e4 2609 in6_dev_put(idev);
87a11578 2610 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2611 goto out;
2612 }
2613
8e2ec639 2614 rt->dst.flags |= DST_HOST;
588753f1 2615 rt->dst.input = ip6_input;
8e2ec639 2616 rt->dst.output = ip6_output;
550bab42 2617 rt->rt6i_gateway = fl6->daddr;
87a11578 2618 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2619 rt->rt6i_dst.plen = 128;
2620 rt->rt6i_idev = idev;
14edd87d 2621 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2622
4c981e28 2623 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2624 * do proper release of the net_device
2625 */
2626 rt6_uncached_list_add(rt);
81eb8447 2627 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2628
87a11578
DM
2629 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2630
1da177e4 2631out:
87a11578 2632 return dst;
1da177e4
LT
2633}
2634
569d3645 2635static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2636{
86393e52 2637 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2638 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2639 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2640 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2641 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2642 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2643 int entries;
7019b78e 2644
fc66f95c 2645 entries = dst_entries_get_fast(ops);
49a18d86 2646 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2647 entries <= rt_max_size)
1da177e4
LT
2648 goto out;
2649
6891a346 2650 net->ipv6.ip6_rt_gc_expire++;
14956643 2651 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2652 entries = dst_entries_get_slow(ops);
2653 if (entries < ops->gc_thresh)
7019b78e 2654 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2655out:
7019b78e 2656 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2657 return entries > rt_max_size;
1da177e4
LT
2658}
2659
8d1c802b 2660static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
d4ead6b3 2661 struct fib6_config *cfg)
e715b6d3 2662{
263243d6 2663 struct dst_metrics *p;
e715b6d3 2664
263243d6
ED
2665 if (!cfg->fc_mx)
2666 return 0;
ea697639 2667
263243d6
ED
2668 p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL);
2669 if (unlikely(!p))
2670 return -ENOMEM;
e715b6d3 2671
263243d6
ED
2672 refcount_set(&p->refcnt, 1);
2673 rt->fib6_metrics = p;
e715b6d3 2674
263243d6 2675 return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics);
e715b6d3 2676}
1da177e4 2677
8c14586f
DA
2678static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2679 struct fib6_config *cfg,
f4797b33
DA
2680 const struct in6_addr *gw_addr,
2681 u32 tbid, int flags)
8c14586f
DA
2682{
2683 struct flowi6 fl6 = {
2684 .flowi6_oif = cfg->fc_ifindex,
2685 .daddr = *gw_addr,
2686 .saddr = cfg->fc_prefsrc,
2687 };
2688 struct fib6_table *table;
2689 struct rt6_info *rt;
8c14586f 2690
f4797b33 2691 table = fib6_get_table(net, tbid);
8c14586f
DA
2692 if (!table)
2693 return NULL;
2694
2695 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2696 flags |= RT6_LOOKUP_F_HAS_SADDR;
2697
f4797b33 2698 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2699 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2700
2701 /* if table lookup failed, fall back to full lookup */
2702 if (rt == net->ipv6.ip6_null_entry) {
2703 ip6_rt_put(rt);
2704 rt = NULL;
2705 }
2706
2707 return rt;
2708}
2709
fc1e64e1
DA
2710static int ip6_route_check_nh_onlink(struct net *net,
2711 struct fib6_config *cfg,
9fbb704c 2712 const struct net_device *dev,
fc1e64e1
DA
2713 struct netlink_ext_ack *extack)
2714{
44750f84 2715 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2716 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2717 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2718 struct rt6_info *grt;
2719 int err;
2720
2721 err = 0;
2722 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2723 if (grt) {
58e354c0
DA
2724 if (!grt->dst.error &&
2725 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2726 NL_SET_ERR_MSG(extack,
2727 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2728 err = -EINVAL;
2729 }
2730
2731 ip6_rt_put(grt);
2732 }
2733
2734 return err;
2735}
2736
1edce99f
DA
2737static int ip6_route_check_nh(struct net *net,
2738 struct fib6_config *cfg,
2739 struct net_device **_dev,
2740 struct inet6_dev **idev)
2741{
2742 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2743 struct net_device *dev = _dev ? *_dev : NULL;
2744 struct rt6_info *grt = NULL;
2745 int err = -EHOSTUNREACH;
2746
2747 if (cfg->fc_table) {
f4797b33
DA
2748 int flags = RT6_LOOKUP_F_IFACE;
2749
2750 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2751 cfg->fc_table, flags);
1edce99f
DA
2752 if (grt) {
2753 if (grt->rt6i_flags & RTF_GATEWAY ||
2754 (dev && dev != grt->dst.dev)) {
2755 ip6_rt_put(grt);
2756 grt = NULL;
2757 }
2758 }
2759 }
2760
2761 if (!grt)
b75cc8f9 2762 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2763
2764 if (!grt)
2765 goto out;
2766
2767 if (dev) {
2768 if (dev != grt->dst.dev) {
2769 ip6_rt_put(grt);
2770 goto out;
2771 }
2772 } else {
2773 *_dev = dev = grt->dst.dev;
2774 *idev = grt->rt6i_idev;
2775 dev_hold(dev);
2776 in6_dev_hold(grt->rt6i_idev);
2777 }
2778
2779 if (!(grt->rt6i_flags & RTF_GATEWAY))
2780 err = 0;
2781
2782 ip6_rt_put(grt);
2783
2784out:
2785 return err;
2786}
2787
9fbb704c
DA
2788static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2789 struct net_device **_dev, struct inet6_dev **idev,
2790 struct netlink_ext_ack *extack)
2791{
2792 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2793 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2794 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2795 const struct net_device *dev = *_dev;
232378e8 2796 bool need_addr_check = !dev;
9fbb704c
DA
2797 int err = -EINVAL;
2798
2799 /* if gw_addr is local we will fail to detect this in case
2800 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2801 * will return already-added prefix route via interface that
2802 * prefix route was assigned to, which might be non-loopback.
2803 */
232378e8
DA
2804 if (dev &&
2805 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2806 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2807 goto out;
2808 }
2809
2810 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2811 /* IPv6 strictly inhibits using not link-local
2812 * addresses as nexthop address.
2813 * Otherwise, router will not able to send redirects.
2814 * It is very good, but in some (rare!) circumstances
2815 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2816 * some exceptions. --ANK
2817 * We allow IPv4-mapped nexthops to support RFC4798-type
2818 * addressing
2819 */
2820 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2821 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2822 goto out;
2823 }
2824
2825 if (cfg->fc_flags & RTNH_F_ONLINK)
2826 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2827 else
2828 err = ip6_route_check_nh(net, cfg, _dev, idev);
2829
2830 if (err)
2831 goto out;
2832 }
2833
2834 /* reload in case device was changed */
2835 dev = *_dev;
2836
2837 err = -EINVAL;
2838 if (!dev) {
2839 NL_SET_ERR_MSG(extack, "Egress device not specified");
2840 goto out;
2841 } else if (dev->flags & IFF_LOOPBACK) {
2842 NL_SET_ERR_MSG(extack,
2843 "Egress device can not be loopback device for this route");
2844 goto out;
2845 }
232378e8
DA
2846
2847 /* if we did not check gw_addr above, do so now that the
2848 * egress device has been resolved.
2849 */
2850 if (need_addr_check &&
2851 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2852 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2853 goto out;
2854 }
2855
9fbb704c
DA
2856 err = 0;
2857out:
2858 return err;
2859}
2860
8d1c802b 2861static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
acb54e3c 2862 gfp_t gfp_flags,
333c4301 2863 struct netlink_ext_ack *extack)
1da177e4 2864{
5578689a 2865 struct net *net = cfg->fc_nlinfo.nl_net;
8d1c802b 2866 struct fib6_info *rt = NULL;
1da177e4
LT
2867 struct net_device *dev = NULL;
2868 struct inet6_dev *idev = NULL;
c71099ac 2869 struct fib6_table *table;
1da177e4 2870 int addr_type;
8c5b83f0 2871 int err = -EINVAL;
1da177e4 2872
557c44be 2873 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2874 if (cfg->fc_flags & RTF_PCPU) {
2875 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2876 goto out;
d5d531cb 2877 }
557c44be 2878
2ea2352e
WW
2879 /* RTF_CACHE is an internal flag; can not be set by userspace */
2880 if (cfg->fc_flags & RTF_CACHE) {
2881 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2882 goto out;
2883 }
2884
e8478e80
DA
2885 if (cfg->fc_type > RTN_MAX) {
2886 NL_SET_ERR_MSG(extack, "Invalid route type");
2887 goto out;
2888 }
2889
d5d531cb
DA
2890 if (cfg->fc_dst_len > 128) {
2891 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2892 goto out;
2893 }
2894 if (cfg->fc_src_len > 128) {
2895 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2896 goto out;
d5d531cb 2897 }
1da177e4 2898#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2899 if (cfg->fc_src_len) {
2900 NL_SET_ERR_MSG(extack,
2901 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2902 goto out;
d5d531cb 2903 }
1da177e4 2904#endif
86872cb5 2905 if (cfg->fc_ifindex) {
1da177e4 2906 err = -ENODEV;
5578689a 2907 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2908 if (!dev)
2909 goto out;
2910 idev = in6_dev_get(dev);
2911 if (!idev)
2912 goto out;
2913 }
2914
86872cb5
TG
2915 if (cfg->fc_metric == 0)
2916 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2917
fc1e64e1
DA
2918 if (cfg->fc_flags & RTNH_F_ONLINK) {
2919 if (!dev) {
2920 NL_SET_ERR_MSG(extack,
2921 "Nexthop device required for onlink");
2922 err = -ENODEV;
2923 goto out;
2924 }
2925
2926 if (!(dev->flags & IFF_UP)) {
2927 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2928 err = -ENETDOWN;
2929 goto out;
2930 }
2931 }
2932
d71314b4 2933 err = -ENOBUFS;
38308473
DM
2934 if (cfg->fc_nlinfo.nlh &&
2935 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2936 table = fib6_get_table(net, cfg->fc_table);
38308473 2937 if (!table) {
f3213831 2938 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2939 table = fib6_new_table(net, cfg->fc_table);
2940 }
2941 } else {
2942 table = fib6_new_table(net, cfg->fc_table);
2943 }
38308473
DM
2944
2945 if (!table)
c71099ac 2946 goto out;
c71099ac 2947
93531c67
DA
2948 err = -ENOMEM;
2949 rt = fib6_info_alloc(gfp_flags);
2950 if (!rt)
1da177e4 2951 goto out;
93531c67
DA
2952
2953 if (cfg->fc_flags & RTF_ADDRCONF)
2954 rt->dst_nocount = true;
1da177e4 2955
d4ead6b3
DA
2956 err = ip6_convert_metrics(net, rt, cfg);
2957 if (err < 0)
2958 goto out;
2959
1716a961 2960 if (cfg->fc_flags & RTF_EXPIRES)
14895687 2961 fib6_set_expires(rt, jiffies +
1716a961
G
2962 clock_t_to_jiffies(cfg->fc_expires));
2963 else
14895687 2964 fib6_clean_expires(rt);
1da177e4 2965
86872cb5
TG
2966 if (cfg->fc_protocol == RTPROT_UNSPEC)
2967 cfg->fc_protocol = RTPROT_BOOT;
93c2fb25 2968 rt->fib6_protocol = cfg->fc_protocol;
86872cb5
TG
2969
2970 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4 2971
19e42e45
RP
2972 if (cfg->fc_encap) {
2973 struct lwtunnel_state *lwtstate;
2974
30357d7d 2975 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 2976 cfg->fc_encap, AF_INET6, cfg,
9ae28727 2977 &lwtstate, extack);
19e42e45
RP
2978 if (err)
2979 goto out;
5e670d84 2980 rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
19e42e45
RP
2981 }
2982
93c2fb25
DA
2983 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2984 rt->fib6_dst.plen = cfg->fc_dst_len;
2985 if (rt->fib6_dst.plen == 128)
3b6761d1 2986 rt->dst_host = true;
e5fd387a 2987
1da177e4 2988#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
2989 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
2990 rt->fib6_src.plen = cfg->fc_src_len;
1da177e4
LT
2991#endif
2992
93c2fb25 2993 rt->fib6_metric = cfg->fc_metric;
5e670d84 2994 rt->fib6_nh.nh_weight = 1;
1da177e4 2995
e8478e80
DA
2996 rt->fib6_type = cfg->fc_type;
2997
1da177e4
LT
2998 /* We cannot add true routes via loopback here,
2999 they would result in kernel looping; promote them to reject routes
3000 */
86872cb5 3001 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
3002 (dev && (dev->flags & IFF_LOOPBACK) &&
3003 !(addr_type & IPV6_ADDR_LOOPBACK) &&
3004 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 3005 /* hold loopback dev/idev if we haven't done so. */
5578689a 3006 if (dev != net->loopback_dev) {
1da177e4
LT
3007 if (dev) {
3008 dev_put(dev);
3009 in6_dev_put(idev);
3010 }
5578689a 3011 dev = net->loopback_dev;
1da177e4
LT
3012 dev_hold(dev);
3013 idev = in6_dev_get(dev);
3014 if (!idev) {
3015 err = -ENODEV;
3016 goto out;
3017 }
3018 }
93c2fb25 3019 rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
1da177e4
LT
3020 goto install_route;
3021 }
3022
86872cb5 3023 if (cfg->fc_flags & RTF_GATEWAY) {
9fbb704c
DA
3024 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3025 if (err)
48ed7b26 3026 goto out;
1da177e4 3027
93531c67 3028 rt->fib6_nh.nh_gw = cfg->fc_gateway;
1da177e4
LT
3029 }
3030
3031 err = -ENODEV;
38308473 3032 if (!dev)
1da177e4
LT
3033 goto out;
3034
428604fb
LB
3035 if (idev->cnf.disable_ipv6) {
3036 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3037 err = -EACCES;
3038 goto out;
3039 }
3040
955ec4cb
DA
3041 if (!(dev->flags & IFF_UP)) {
3042 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3043 err = -ENETDOWN;
3044 goto out;
3045 }
3046
c3968a85
DW
3047 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3048 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3049 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3050 err = -EINVAL;
3051 goto out;
3052 }
93c2fb25
DA
3053 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3054 rt->fib6_prefsrc.plen = 128;
c3968a85 3055 } else
93c2fb25 3056 rt->fib6_prefsrc.plen = 0;
c3968a85 3057
93c2fb25 3058 rt->fib6_flags = cfg->fc_flags;
1da177e4
LT
3059
3060install_route:
93c2fb25 3061 if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
5609b80a 3062 !netif_carrier_ok(dev))
5e670d84
DA
3063 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3064 rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
93531c67 3065 rt->fib6_nh.nh_dev = dev;
93c2fb25 3066 rt->fib6_table = table;
63152fc0 3067
c346dca1 3068 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 3069
dcd1f572
DA
3070 if (idev)
3071 in6_dev_put(idev);
3072
8c5b83f0 3073 return rt;
6b9ea5a6
RP
3074out:
3075 if (dev)
3076 dev_put(dev);
3077 if (idev)
3078 in6_dev_put(idev);
6b9ea5a6 3079
93531c67 3080 fib6_info_release(rt);
8c5b83f0 3081 return ERR_PTR(err);
6b9ea5a6
RP
3082}
3083
acb54e3c
DA
3084int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3085 struct netlink_ext_ack *extack)
6b9ea5a6 3086{
8d1c802b 3087 struct fib6_info *rt;
6b9ea5a6
RP
3088 int err;
3089
acb54e3c 3090 rt = ip6_route_info_create(cfg, gfp_flags, extack);
d4ead6b3
DA
3091 if (IS_ERR(rt))
3092 return PTR_ERR(rt);
6b9ea5a6 3093
d4ead6b3 3094 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
93531c67 3095 fib6_info_release(rt);
6b9ea5a6 3096
1da177e4
LT
3097 return err;
3098}
3099
8d1c802b 3100static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
1da177e4 3101{
afb1d4b5 3102 struct net *net = info->nl_net;
c71099ac 3103 struct fib6_table *table;
afb1d4b5 3104 int err;
1da177e4 3105
421842ed 3106 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3107 err = -ENOENT;
3108 goto out;
3109 }
6c813a72 3110
93c2fb25 3111 table = rt->fib6_table;
66f5d6ce 3112 spin_lock_bh(&table->tb6_lock);
86872cb5 3113 err = fib6_del(rt, info);
66f5d6ce 3114 spin_unlock_bh(&table->tb6_lock);
1da177e4 3115
6825a26c 3116out:
93531c67 3117 fib6_info_release(rt);
1da177e4
LT
3118 return err;
3119}
3120
8d1c802b 3121int ip6_del_rt(struct net *net, struct fib6_info *rt)
e0a1ad73 3122{
afb1d4b5
DA
3123 struct nl_info info = { .nl_net = net };
3124
528c4ceb 3125 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3126}
3127
8d1c802b 3128static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
0ae81335
DA
3129{
3130 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3131 struct net *net = info->nl_net;
16a16cd3 3132 struct sk_buff *skb = NULL;
0ae81335 3133 struct fib6_table *table;
e3330039 3134 int err = -ENOENT;
0ae81335 3135
421842ed 3136 if (rt == net->ipv6.fib6_null_entry)
e3330039 3137 goto out_put;
93c2fb25 3138 table = rt->fib6_table;
66f5d6ce 3139 spin_lock_bh(&table->tb6_lock);
0ae81335 3140
93c2fb25 3141 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
8d1c802b 3142 struct fib6_info *sibling, *next_sibling;
0ae81335 3143
16a16cd3
DA
3144 /* prefer to send a single notification with all hops */
3145 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3146 if (skb) {
3147 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3148
d4ead6b3 3149 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3150 NULL, NULL, 0, RTM_DELROUTE,
3151 info->portid, seq, 0) < 0) {
3152 kfree_skb(skb);
3153 skb = NULL;
3154 } else
3155 info->skip_notify = 1;
3156 }
3157
0ae81335 3158 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25
DA
3159 &rt->fib6_siblings,
3160 fib6_siblings) {
0ae81335
DA
3161 err = fib6_del(sibling, info);
3162 if (err)
e3330039 3163 goto out_unlock;
0ae81335
DA
3164 }
3165 }
3166
3167 err = fib6_del(rt, info);
e3330039 3168out_unlock:
66f5d6ce 3169 spin_unlock_bh(&table->tb6_lock);
e3330039 3170out_put:
93531c67 3171 fib6_info_release(rt);
16a16cd3
DA
3172
3173 if (skb) {
e3330039 3174 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3175 info->nlh, gfp_any());
3176 }
0ae81335
DA
3177 return err;
3178}
3179
23fb93a4
DA
3180static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3181{
3182 int rc = -ESRCH;
3183
3184 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3185 goto out;
3186
3187 if (cfg->fc_flags & RTF_GATEWAY &&
3188 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3189 goto out;
3190 if (dst_hold_safe(&rt->dst))
3191 rc = rt6_remove_exception_rt(rt);
3192out:
3193 return rc;
3194}
3195
333c4301
DA
3196static int ip6_route_del(struct fib6_config *cfg,
3197 struct netlink_ext_ack *extack)
1da177e4 3198{
8d1c802b 3199 struct rt6_info *rt_cache;
c71099ac 3200 struct fib6_table *table;
8d1c802b 3201 struct fib6_info *rt;
1da177e4 3202 struct fib6_node *fn;
1da177e4
LT
3203 int err = -ESRCH;
3204
5578689a 3205 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3206 if (!table) {
3207 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3208 return err;
d5d531cb 3209 }
c71099ac 3210
66f5d6ce 3211 rcu_read_lock();
1da177e4 3212
c71099ac 3213 fn = fib6_locate(&table->tb6_root,
86872cb5 3214 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3215 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3216 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3217
1da177e4 3218 if (fn) {
66f5d6ce 3219 for_each_fib6_node_rt_rcu(fn) {
2b760fcf 3220 if (cfg->fc_flags & RTF_CACHE) {
23fb93a4
DA
3221 int rc;
3222
2b760fcf
WW
3223 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3224 &cfg->fc_src);
23fb93a4
DA
3225 if (rt_cache) {
3226 rc = ip6_del_cached_rt(rt_cache, cfg);
3227 if (rc != -ESRCH)
3228 return rc;
3229 }
3230 continue;
2b760fcf 3231 }
86872cb5 3232 if (cfg->fc_ifindex &&
5e670d84
DA
3233 (!rt->fib6_nh.nh_dev ||
3234 rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3235 continue;
86872cb5 3236 if (cfg->fc_flags & RTF_GATEWAY &&
5e670d84 3237 !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
1da177e4 3238 continue;
93c2fb25 3239 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
1da177e4 3240 continue;
93c2fb25 3241 if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
c2ed1880 3242 continue;
93531c67 3243 fib6_info_hold(rt);
66f5d6ce 3244 rcu_read_unlock();
1da177e4 3245
0ae81335
DA
3246 /* if gateway was specified only delete the one hop */
3247 if (cfg->fc_flags & RTF_GATEWAY)
3248 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3249
3250 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3251 }
3252 }
66f5d6ce 3253 rcu_read_unlock();
1da177e4
LT
3254
3255 return err;
3256}
3257
6700c270 3258static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3259{
a6279458 3260 struct netevent_redirect netevent;
e8599ff4 3261 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3262 struct ndisc_options ndopts;
3263 struct inet6_dev *in6_dev;
3264 struct neighbour *neigh;
a68886a6 3265 struct fib6_info *from;
71bcdba0 3266 struct rd_msg *msg;
6e157b6a
DM
3267 int optlen, on_link;
3268 u8 *lladdr;
e8599ff4 3269
29a3cad5 3270 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3271 optlen -= sizeof(*msg);
e8599ff4
DM
3272
3273 if (optlen < 0) {
6e157b6a 3274 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3275 return;
3276 }
3277
71bcdba0 3278 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3279
71bcdba0 3280 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3281 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3282 return;
3283 }
3284
6e157b6a 3285 on_link = 0;
71bcdba0 3286 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3287 on_link = 1;
71bcdba0 3288 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3289 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3290 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3291 return;
3292 }
3293
3294 in6_dev = __in6_dev_get(skb->dev);
3295 if (!in6_dev)
3296 return;
3297 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3298 return;
3299
3300 /* RFC2461 8.1:
3301 * The IP source address of the Redirect MUST be the same as the current
3302 * first-hop router for the specified ICMP Destination Address.
3303 */
3304
f997c55c 3305 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3306 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3307 return;
3308 }
6e157b6a
DM
3309
3310 lladdr = NULL;
e8599ff4
DM
3311 if (ndopts.nd_opts_tgt_lladdr) {
3312 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3313 skb->dev);
3314 if (!lladdr) {
3315 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3316 return;
3317 }
3318 }
3319
6e157b6a 3320 rt = (struct rt6_info *) dst;
ec13ad1d 3321 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3322 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3323 return;
6e157b6a 3324 }
e8599ff4 3325
6e157b6a
DM
3326 /* Redirect received -> path was valid.
3327 * Look, redirects are sent only in response to data packets,
3328 * so that this nexthop apparently is reachable. --ANK
3329 */
0dec879f 3330 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3331
71bcdba0 3332 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3333 if (!neigh)
3334 return;
a6279458 3335
1da177e4
LT
3336 /*
3337 * We have finally decided to accept it.
3338 */
3339
f997c55c 3340 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3341 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3342 NEIGH_UPDATE_F_OVERRIDE|
3343 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3344 NEIGH_UPDATE_F_ISROUTER)),
3345 NDISC_REDIRECT, &ndopts);
1da177e4 3346
4d85cd0c 3347 rcu_read_lock();
a68886a6 3348 from = rcu_dereference(rt->from);
8a14e46f 3349 fib6_info_hold(from);
4d85cd0c 3350 rcu_read_unlock();
8a14e46f
DA
3351
3352 nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
38308473 3353 if (!nrt)
1da177e4
LT
3354 goto out;
3355
3356 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3357 if (on_link)
3358 nrt->rt6i_flags &= ~RTF_GATEWAY;
3359
4e3fd7a0 3360 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3361
2b760fcf
WW
3362 /* No need to remove rt from the exception table if rt is
3363 * a cached route because rt6_insert_exception() will
3364 * takes care of it
3365 */
8a14e46f 3366 if (rt6_insert_exception(nrt, from)) {
2b760fcf
WW
3367 dst_release_immediate(&nrt->dst);
3368 goto out;
3369 }
1da177e4 3370
d8d1f30b
CG
3371 netevent.old = &rt->dst;
3372 netevent.new = &nrt->dst;
71bcdba0 3373 netevent.daddr = &msg->dest;
60592833 3374 netevent.neigh = neigh;
8d71740c
TT
3375 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3376
1da177e4 3377out:
8a14e46f 3378 fib6_info_release(from);
e8599ff4 3379 neigh_release(neigh);
6e157b6a
DM
3380}
3381
70ceb4f5 3382#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 3383static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 3384 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3385 const struct in6_addr *gwaddr,
3386 struct net_device *dev)
70ceb4f5 3387{
830218c1
DA
3388 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3389 int ifindex = dev->ifindex;
70ceb4f5 3390 struct fib6_node *fn;
8d1c802b 3391 struct fib6_info *rt = NULL;
c71099ac
TG
3392 struct fib6_table *table;
3393
830218c1 3394 table = fib6_get_table(net, tb_id);
38308473 3395 if (!table)
c71099ac 3396 return NULL;
70ceb4f5 3397
66f5d6ce 3398 rcu_read_lock();
38fbeeee 3399 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3400 if (!fn)
3401 goto out;
3402
66f5d6ce 3403 for_each_fib6_node_rt_rcu(fn) {
5e670d84 3404 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
70ceb4f5 3405 continue;
93c2fb25 3406 if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
70ceb4f5 3407 continue;
5e670d84 3408 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
70ceb4f5 3409 continue;
8d1c802b 3410 fib6_info_hold(rt);
70ceb4f5
YH
3411 break;
3412 }
3413out:
66f5d6ce 3414 rcu_read_unlock();
70ceb4f5
YH
3415 return rt;
3416}
3417
8d1c802b 3418static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 3419 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3420 const struct in6_addr *gwaddr,
3421 struct net_device *dev,
95c96174 3422 unsigned int pref)
70ceb4f5 3423{
86872cb5 3424 struct fib6_config cfg = {
238fc7ea 3425 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3426 .fc_ifindex = dev->ifindex,
86872cb5
TG
3427 .fc_dst_len = prefixlen,
3428 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3429 RTF_UP | RTF_PREF(pref),
b91d5329 3430 .fc_protocol = RTPROT_RA,
e8478e80 3431 .fc_type = RTN_UNICAST,
15e47304 3432 .fc_nlinfo.portid = 0,
efa2cea0
DL
3433 .fc_nlinfo.nlh = NULL,
3434 .fc_nlinfo.nl_net = net,
86872cb5
TG
3435 };
3436
830218c1 3437 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3438 cfg.fc_dst = *prefix;
3439 cfg.fc_gateway = *gwaddr;
70ceb4f5 3440
e317da96
YH
3441 /* We should treat it as a default route if prefix length is 0. */
3442 if (!prefixlen)
86872cb5 3443 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3444
acb54e3c 3445 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
70ceb4f5 3446
830218c1 3447 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3448}
3449#endif
3450
8d1c802b 3451struct fib6_info *rt6_get_dflt_router(struct net *net,
afb1d4b5
DA
3452 const struct in6_addr *addr,
3453 struct net_device *dev)
1ab1457c 3454{
830218c1 3455 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
8d1c802b 3456 struct fib6_info *rt;
c71099ac 3457 struct fib6_table *table;
1da177e4 3458
afb1d4b5 3459 table = fib6_get_table(net, tb_id);
38308473 3460 if (!table)
c71099ac 3461 return NULL;
1da177e4 3462
66f5d6ce
WW
3463 rcu_read_lock();
3464 for_each_fib6_node_rt_rcu(&table->tb6_root) {
5e670d84 3465 if (dev == rt->fib6_nh.nh_dev &&
93c2fb25 3466 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
5e670d84 3467 ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
1da177e4
LT
3468 break;
3469 }
3470 if (rt)
8d1c802b 3471 fib6_info_hold(rt);
66f5d6ce 3472 rcu_read_unlock();
1da177e4
LT
3473 return rt;
3474}
3475
8d1c802b 3476struct fib6_info *rt6_add_dflt_router(struct net *net,
afb1d4b5 3477 const struct in6_addr *gwaddr,
ebacaaa0
YH
3478 struct net_device *dev,
3479 unsigned int pref)
1da177e4 3480{
86872cb5 3481 struct fib6_config cfg = {
ca254490 3482 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3483 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3484 .fc_ifindex = dev->ifindex,
3485 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3486 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3487 .fc_protocol = RTPROT_RA,
e8478e80 3488 .fc_type = RTN_UNICAST,
15e47304 3489 .fc_nlinfo.portid = 0,
5578689a 3490 .fc_nlinfo.nlh = NULL,
afb1d4b5 3491 .fc_nlinfo.nl_net = net,
86872cb5 3492 };
1da177e4 3493
4e3fd7a0 3494 cfg.fc_gateway = *gwaddr;
1da177e4 3495
acb54e3c 3496 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
830218c1
DA
3497 struct fib6_table *table;
3498
3499 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3500 if (table)
3501 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3502 }
1da177e4 3503
afb1d4b5 3504 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3505}
3506
afb1d4b5
DA
3507static void __rt6_purge_dflt_routers(struct net *net,
3508 struct fib6_table *table)
1da177e4 3509{
8d1c802b 3510 struct fib6_info *rt;
1da177e4
LT
3511
3512restart:
66f5d6ce
WW
3513 rcu_read_lock();
3514 for_each_fib6_node_rt_rcu(&table->tb6_root) {
dcd1f572
DA
3515 struct net_device *dev = fib6_info_nh_dev(rt);
3516 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3517
93c2fb25 3518 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
dcd1f572 3519 (!idev || idev->cnf.accept_ra != 2)) {
93531c67
DA
3520 fib6_info_hold(rt);
3521 rcu_read_unlock();
3522 ip6_del_rt(net, rt);
1da177e4
LT
3523 goto restart;
3524 }
3525 }
66f5d6ce 3526 rcu_read_unlock();
830218c1
DA
3527
3528 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3529}
3530
3531void rt6_purge_dflt_routers(struct net *net)
3532{
3533 struct fib6_table *table;
3534 struct hlist_head *head;
3535 unsigned int h;
3536
3537 rcu_read_lock();
3538
3539 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3540 head = &net->ipv6.fib_table_hash[h];
3541 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3542 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3543 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3544 }
3545 }
3546
3547 rcu_read_unlock();
1da177e4
LT
3548}
3549
5578689a
DL
3550static void rtmsg_to_fib6_config(struct net *net,
3551 struct in6_rtmsg *rtmsg,
86872cb5
TG
3552 struct fib6_config *cfg)
3553{
3554 memset(cfg, 0, sizeof(*cfg));
3555
ca254490
DA
3556 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3557 : RT6_TABLE_MAIN;
86872cb5
TG
3558 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3559 cfg->fc_metric = rtmsg->rtmsg_metric;
3560 cfg->fc_expires = rtmsg->rtmsg_info;
3561 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3562 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3563 cfg->fc_flags = rtmsg->rtmsg_flags;
e8478e80 3564 cfg->fc_type = rtmsg->rtmsg_type;
86872cb5 3565
5578689a 3566 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3567
4e3fd7a0
AD
3568 cfg->fc_dst = rtmsg->rtmsg_dst;
3569 cfg->fc_src = rtmsg->rtmsg_src;
3570 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3571}
3572
5578689a 3573int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3574{
86872cb5 3575 struct fib6_config cfg;
1da177e4
LT
3576 struct in6_rtmsg rtmsg;
3577 int err;
3578
67ba4152 3579 switch (cmd) {
1da177e4
LT
3580 case SIOCADDRT: /* Add a route */
3581 case SIOCDELRT: /* Delete a route */
af31f412 3582 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3583 return -EPERM;
3584 err = copy_from_user(&rtmsg, arg,
3585 sizeof(struct in6_rtmsg));
3586 if (err)
3587 return -EFAULT;
86872cb5 3588
5578689a 3589 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3590
1da177e4
LT
3591 rtnl_lock();
3592 switch (cmd) {
3593 case SIOCADDRT:
acb54e3c 3594 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
1da177e4
LT
3595 break;
3596 case SIOCDELRT:
333c4301 3597 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3598 break;
3599 default:
3600 err = -EINVAL;
3601 }
3602 rtnl_unlock();
3603
3604 return err;
3ff50b79 3605 }
1da177e4
LT
3606
3607 return -EINVAL;
3608}
3609
3610/*
3611 * Drop the packet on the floor
3612 */
3613
d5fdd6ba 3614static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3615{
612f09e8 3616 int type;
adf30907 3617 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3618 switch (ipstats_mib_noroutes) {
3619 case IPSTATS_MIB_INNOROUTES:
0660e03f 3620 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3621 if (type == IPV6_ADDR_ANY) {
bdb7cc64
SS
3622 IP6_INC_STATS(dev_net(dst->dev),
3623 __in6_dev_get_safely(skb->dev),
3bd653c8 3624 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3625 break;
3626 }
3627 /* FALLTHROUGH */
3628 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3629 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3630 ipstats_mib_noroutes);
612f09e8
YH
3631 break;
3632 }
3ffe533c 3633 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3634 kfree_skb(skb);
3635 return 0;
3636}
3637
9ce8ade0
TG
3638static int ip6_pkt_discard(struct sk_buff *skb)
3639{
612f09e8 3640 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3641}
3642
ede2059d 3643static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3644{
adf30907 3645 skb->dev = skb_dst(skb)->dev;
612f09e8 3646 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3647}
3648
9ce8ade0
TG
3649static int ip6_pkt_prohibit(struct sk_buff *skb)
3650{
612f09e8 3651 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3652}
3653
ede2059d 3654static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3655{
adf30907 3656 skb->dev = skb_dst(skb)->dev;
612f09e8 3657 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3658}
3659
1da177e4
LT
3660/*
3661 * Allocate a dst for local (unicast / anycast) address.
3662 */
3663
360a9887
DA
3664struct fib6_info *addrconf_f6i_alloc(struct net *net,
3665 struct inet6_dev *idev,
3666 const struct in6_addr *addr,
3667 bool anycast, gfp_t gfp_flags)
1da177e4 3668{
ca254490 3669 u32 tb_id;
4832c30d 3670 struct net_device *dev = idev->dev;
360a9887 3671 struct fib6_info *f6i;
5f02ce24 3672
360a9887
DA
3673 f6i = fib6_info_alloc(gfp_flags);
3674 if (!f6i)
1da177e4
LT
3675 return ERR_PTR(-ENOMEM);
3676
360a9887 3677 f6i->dst_nocount = true;
360a9887
DA
3678 f6i->dst_host = true;
3679 f6i->fib6_protocol = RTPROT_KERNEL;
3680 f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
e8478e80 3681 if (anycast) {
360a9887
DA
3682 f6i->fib6_type = RTN_ANYCAST;
3683 f6i->fib6_flags |= RTF_ANYCAST;
e8478e80 3684 } else {
360a9887
DA
3685 f6i->fib6_type = RTN_LOCAL;
3686 f6i->fib6_flags |= RTF_LOCAL;
e8478e80 3687 }
1da177e4 3688
360a9887 3689 f6i->fib6_nh.nh_gw = *addr;
93531c67 3690 dev_hold(dev);
360a9887
DA
3691 f6i->fib6_nh.nh_dev = dev;
3692 f6i->fib6_dst.addr = *addr;
3693 f6i->fib6_dst.plen = 128;
ca254490 3694 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
360a9887 3695 f6i->fib6_table = fib6_get_table(net, tb_id);
1da177e4 3696
360a9887 3697 return f6i;
1da177e4
LT
3698}
3699
c3968a85
DW
3700/* remove deleted ip from prefsrc entries */
3701struct arg_dev_net_ip {
3702 struct net_device *dev;
3703 struct net *net;
3704 struct in6_addr *addr;
3705};
3706
8d1c802b 3707static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
c3968a85
DW
3708{
3709 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3710 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3711 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3712
5e670d84 3713 if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
421842ed 3714 rt != net->ipv6.fib6_null_entry &&
93c2fb25 3715 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
60006a48 3716 spin_lock_bh(&rt6_exception_lock);
c3968a85 3717 /* remove prefsrc entry */
93c2fb25 3718 rt->fib6_prefsrc.plen = 0;
60006a48
WW
3719 /* need to update cache as well */
3720 rt6_exceptions_remove_prefsrc(rt);
3721 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3722 }
3723 return 0;
3724}
3725
3726void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3727{
3728 struct net *net = dev_net(ifp->idev->dev);
3729 struct arg_dev_net_ip adni = {
3730 .dev = ifp->idev->dev,
3731 .net = net,
3732 .addr = &ifp->addr,
3733 };
0c3584d5 3734 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3735}
3736
be7a010d 3737#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3738
3739/* Remove routers and update dst entries when gateway turn into host. */
8d1c802b 3740static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
be7a010d
DJ
3741{
3742 struct in6_addr *gateway = (struct in6_addr *)arg;
3743
93c2fb25 3744 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
5e670d84 3745 ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
be7a010d
DJ
3746 return -1;
3747 }
b16cb459
WW
3748
3749 /* Further clean up cached routes in exception table.
3750 * This is needed because cached route may have a different
3751 * gateway than its 'parent' in the case of an ip redirect.
3752 */
3753 rt6_exceptions_clean_tohost(rt, gateway);
3754
be7a010d
DJ
3755 return 0;
3756}
3757
3758void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3759{
3760 fib6_clean_all(net, fib6_clean_tohost, gateway);
3761}
3762
2127d95a
IS
3763struct arg_netdev_event {
3764 const struct net_device *dev;
4c981e28
IS
3765 union {
3766 unsigned int nh_flags;
3767 unsigned long event;
3768 };
2127d95a
IS
3769};
3770
8d1c802b 3771static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
d7dedee1 3772{
8d1c802b 3773 struct fib6_info *iter;
d7dedee1
IS
3774 struct fib6_node *fn;
3775
93c2fb25
DA
3776 fn = rcu_dereference_protected(rt->fib6_node,
3777 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3778 iter = rcu_dereference_protected(fn->leaf,
93c2fb25 3779 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3780 while (iter) {
93c2fb25 3781 if (iter->fib6_metric == rt->fib6_metric &&
d7dedee1
IS
3782 rt6_qualify_for_ecmp(iter))
3783 return iter;
8fb11a9a 3784 iter = rcu_dereference_protected(iter->fib6_next,
93c2fb25 3785 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1
IS
3786 }
3787
3788 return NULL;
3789}
3790
8d1c802b 3791static bool rt6_is_dead(const struct fib6_info *rt)
d7dedee1 3792{
5e670d84
DA
3793 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3794 (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
dcd1f572 3795 fib6_ignore_linkdown(rt)))
d7dedee1
IS
3796 return true;
3797
3798 return false;
3799}
3800
8d1c802b 3801static int rt6_multipath_total_weight(const struct fib6_info *rt)
d7dedee1 3802{
8d1c802b 3803 struct fib6_info *iter;
d7dedee1
IS
3804 int total = 0;
3805
3806 if (!rt6_is_dead(rt))
5e670d84 3807 total += rt->fib6_nh.nh_weight;
d7dedee1 3808
93c2fb25 3809 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
d7dedee1 3810 if (!rt6_is_dead(iter))
5e670d84 3811 total += iter->fib6_nh.nh_weight;
d7dedee1
IS
3812 }
3813
3814 return total;
3815}
3816
8d1c802b 3817static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
d7dedee1
IS
3818{
3819 int upper_bound = -1;
3820
3821 if (!rt6_is_dead(rt)) {
5e670d84 3822 *weight += rt->fib6_nh.nh_weight;
d7dedee1
IS
3823 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3824 total) - 1;
3825 }
5e670d84 3826 atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
d7dedee1
IS
3827}
3828
8d1c802b 3829static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
d7dedee1 3830{
8d1c802b 3831 struct fib6_info *iter;
d7dedee1
IS
3832 int weight = 0;
3833
3834 rt6_upper_bound_set(rt, &weight, total);
3835
93c2fb25 3836 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
d7dedee1
IS
3837 rt6_upper_bound_set(iter, &weight, total);
3838}
3839
8d1c802b 3840void rt6_multipath_rebalance(struct fib6_info *rt)
d7dedee1 3841{
8d1c802b 3842 struct fib6_info *first;
d7dedee1
IS
3843 int total;
3844
3845 /* In case the entire multipath route was marked for flushing,
3846 * then there is no need to rebalance upon the removal of every
3847 * sibling route.
3848 */
93c2fb25 3849 if (!rt->fib6_nsiblings || rt->should_flush)
d7dedee1
IS
3850 return;
3851
3852 /* During lookup routes are evaluated in order, so we need to
3853 * make sure upper bounds are assigned from the first sibling
3854 * onwards.
3855 */
3856 first = rt6_multipath_first_sibling(rt);
3857 if (WARN_ON_ONCE(!first))
3858 return;
3859
3860 total = rt6_multipath_total_weight(first);
3861 rt6_multipath_upper_bound_set(first, total);
3862}
3863
8d1c802b 3864static int fib6_ifup(struct fib6_info *rt, void *p_arg)
2127d95a
IS
3865{
3866 const struct arg_netdev_event *arg = p_arg;
7aef6859 3867 struct net *net = dev_net(arg->dev);
2127d95a 3868
421842ed 3869 if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
5e670d84 3870 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
7aef6859 3871 fib6_update_sernum_upto_root(net, rt);
d7dedee1 3872 rt6_multipath_rebalance(rt);
1de178ed 3873 }
2127d95a
IS
3874
3875 return 0;
3876}
3877
3878void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3879{
3880 struct arg_netdev_event arg = {
3881 .dev = dev,
6802f3ad
IS
3882 {
3883 .nh_flags = nh_flags,
3884 },
2127d95a
IS
3885 };
3886
3887 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3888 arg.nh_flags |= RTNH_F_LINKDOWN;
3889
3890 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3891}
3892
8d1c802b 3893static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
1de178ed
IS
3894 const struct net_device *dev)
3895{
8d1c802b 3896 struct fib6_info *iter;
1de178ed 3897
5e670d84 3898 if (rt->fib6_nh.nh_dev == dev)
1de178ed 3899 return true;
93c2fb25 3900 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84 3901 if (iter->fib6_nh.nh_dev == dev)
1de178ed
IS
3902 return true;
3903
3904 return false;
3905}
3906
8d1c802b 3907static void rt6_multipath_flush(struct fib6_info *rt)
1de178ed 3908{
8d1c802b 3909 struct fib6_info *iter;
1de178ed
IS
3910
3911 rt->should_flush = 1;
93c2fb25 3912 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1de178ed
IS
3913 iter->should_flush = 1;
3914}
3915
8d1c802b 3916static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
1de178ed
IS
3917 const struct net_device *down_dev)
3918{
8d1c802b 3919 struct fib6_info *iter;
1de178ed
IS
3920 unsigned int dead = 0;
3921
5e670d84
DA
3922 if (rt->fib6_nh.nh_dev == down_dev ||
3923 rt->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed 3924 dead++;
93c2fb25 3925 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
3926 if (iter->fib6_nh.nh_dev == down_dev ||
3927 iter->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed
IS
3928 dead++;
3929
3930 return dead;
3931}
3932
8d1c802b 3933static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
1de178ed
IS
3934 const struct net_device *dev,
3935 unsigned int nh_flags)
3936{
8d1c802b 3937 struct fib6_info *iter;
1de178ed 3938
5e670d84
DA
3939 if (rt->fib6_nh.nh_dev == dev)
3940 rt->fib6_nh.nh_flags |= nh_flags;
93c2fb25 3941 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
3942 if (iter->fib6_nh.nh_dev == dev)
3943 iter->fib6_nh.nh_flags |= nh_flags;
1de178ed
IS
3944}
3945
a1a22c12 3946/* called with write lock held for table with rt */
8d1c802b 3947static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
1da177e4 3948{
4c981e28
IS
3949 const struct arg_netdev_event *arg = p_arg;
3950 const struct net_device *dev = arg->dev;
7aef6859 3951 struct net *net = dev_net(dev);
8ed67789 3952
421842ed 3953 if (rt == net->ipv6.fib6_null_entry)
27c6fa73
IS
3954 return 0;
3955
3956 switch (arg->event) {
3957 case NETDEV_UNREGISTER:
5e670d84 3958 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
27c6fa73 3959 case NETDEV_DOWN:
1de178ed 3960 if (rt->should_flush)
27c6fa73 3961 return -1;
93c2fb25 3962 if (!rt->fib6_nsiblings)
5e670d84 3963 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
1de178ed
IS
3964 if (rt6_multipath_uses_dev(rt, dev)) {
3965 unsigned int count;
3966
3967 count = rt6_multipath_dead_count(rt, dev);
93c2fb25 3968 if (rt->fib6_nsiblings + 1 == count) {
1de178ed
IS
3969 rt6_multipath_flush(rt);
3970 return -1;
3971 }
3972 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3973 RTNH_F_LINKDOWN);
7aef6859 3974 fib6_update_sernum(net, rt);
d7dedee1 3975 rt6_multipath_rebalance(rt);
1de178ed
IS
3976 }
3977 return -2;
27c6fa73 3978 case NETDEV_CHANGE:
5e670d84 3979 if (rt->fib6_nh.nh_dev != dev ||
93c2fb25 3980 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 3981 break;
5e670d84 3982 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 3983 rt6_multipath_rebalance(rt);
27c6fa73 3984 break;
2b241361 3985 }
c159d30c 3986
1da177e4
LT
3987 return 0;
3988}
3989
27c6fa73 3990void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 3991{
4c981e28 3992 struct arg_netdev_event arg = {
8ed67789 3993 .dev = dev,
6802f3ad
IS
3994 {
3995 .event = event,
3996 },
8ed67789
DL
3997 };
3998
4c981e28
IS
3999 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
4000}
4001
4002void rt6_disable_ip(struct net_device *dev, unsigned long event)
4003{
4004 rt6_sync_down_dev(dev, event);
4005 rt6_uncached_list_flush_dev(dev_net(dev), dev);
4006 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
4007}
4008
95c96174 4009struct rt6_mtu_change_arg {
1da177e4 4010 struct net_device *dev;
95c96174 4011 unsigned int mtu;
1da177e4
LT
4012};
4013
8d1c802b 4014static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4015{
4016 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4017 struct inet6_dev *idev;
4018
4019 /* In IPv6 pmtu discovery is not optional,
4020 so that RTAX_MTU lock cannot disable it.
4021 We still use this lock to block changes
4022 caused by addrconf/ndisc.
4023 */
4024
4025 idev = __in6_dev_get(arg->dev);
38308473 4026 if (!idev)
1da177e4
LT
4027 return 0;
4028
4029 /* For administrative MTU increase, there is no way to discover
4030 IPv6 PMTU increase, so PMTU increase should be updated here.
4031 Since RFC 1981 doesn't include administrative MTU increase
4032 update PMTU increase is a MUST. (i.e. jumbo frame)
4033 */
5e670d84 4034 if (rt->fib6_nh.nh_dev == arg->dev &&
d4ead6b3
DA
4035 !fib6_metric_locked(rt, RTAX_MTU)) {
4036 u32 mtu = rt->fib6_pmtu;
4037
4038 if (mtu >= arg->mtu ||
4039 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4040 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4041
f5bbe7ee 4042 spin_lock_bh(&rt6_exception_lock);
e9fa1495 4043 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 4044 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4045 }
1da177e4
LT
4046 return 0;
4047}
4048
95c96174 4049void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4050{
c71099ac
TG
4051 struct rt6_mtu_change_arg arg = {
4052 .dev = dev,
4053 .mtu = mtu,
4054 };
1da177e4 4055
0c3584d5 4056 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4057}
4058
ef7c79ed 4059static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4060 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
aa8f8778 4061 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
86872cb5 4062 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4063 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4064 [RTA_PRIORITY] = { .type = NLA_U32 },
4065 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4066 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4067 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4068 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4069 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4070 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4071 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4072 [RTA_MARK] = { .type = NLA_U32 },
aa8f8778 4073 [RTA_TABLE] = { .type = NLA_U32 },
86872cb5
TG
4074};
4075
4076static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4077 struct fib6_config *cfg,
4078 struct netlink_ext_ack *extack)
1da177e4 4079{
86872cb5
TG
4080 struct rtmsg *rtm;
4081 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4082 unsigned int pref;
86872cb5 4083 int err;
1da177e4 4084
fceb6435
JB
4085 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4086 NULL);
86872cb5
TG
4087 if (err < 0)
4088 goto errout;
1da177e4 4089
86872cb5
TG
4090 err = -EINVAL;
4091 rtm = nlmsg_data(nlh);
4092 memset(cfg, 0, sizeof(*cfg));
4093
4094 cfg->fc_table = rtm->rtm_table;
4095 cfg->fc_dst_len = rtm->rtm_dst_len;
4096 cfg->fc_src_len = rtm->rtm_src_len;
4097 cfg->fc_flags = RTF_UP;
4098 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 4099 cfg->fc_type = rtm->rtm_type;
86872cb5 4100
ef2c7d7b
ND
4101 if (rtm->rtm_type == RTN_UNREACHABLE ||
4102 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4103 rtm->rtm_type == RTN_PROHIBIT ||
4104 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4105 cfg->fc_flags |= RTF_REJECT;
4106
ab79ad14
4107 if (rtm->rtm_type == RTN_LOCAL)
4108 cfg->fc_flags |= RTF_LOCAL;
4109
1f56a01f
MKL
4110 if (rtm->rtm_flags & RTM_F_CLONED)
4111 cfg->fc_flags |= RTF_CACHE;
4112
fc1e64e1
DA
4113 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4114
15e47304 4115 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 4116 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 4117 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
4118
4119 if (tb[RTA_GATEWAY]) {
67b61f6c 4120 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4121 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4122 }
86872cb5
TG
4123
4124 if (tb[RTA_DST]) {
4125 int plen = (rtm->rtm_dst_len + 7) >> 3;
4126
4127 if (nla_len(tb[RTA_DST]) < plen)
4128 goto errout;
4129
4130 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4131 }
86872cb5
TG
4132
4133 if (tb[RTA_SRC]) {
4134 int plen = (rtm->rtm_src_len + 7) >> 3;
4135
4136 if (nla_len(tb[RTA_SRC]) < plen)
4137 goto errout;
4138
4139 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4140 }
86872cb5 4141
c3968a85 4142 if (tb[RTA_PREFSRC])
67b61f6c 4143 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4144
86872cb5
TG
4145 if (tb[RTA_OIF])
4146 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4147
4148 if (tb[RTA_PRIORITY])
4149 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4150
4151 if (tb[RTA_METRICS]) {
4152 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4153 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4154 }
86872cb5
TG
4155
4156 if (tb[RTA_TABLE])
4157 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4158
51ebd318
ND
4159 if (tb[RTA_MULTIPATH]) {
4160 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4161 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4162
4163 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4164 cfg->fc_mp_len, extack);
9ed59592
DA
4165 if (err < 0)
4166 goto errout;
51ebd318
ND
4167 }
4168
c78ba6d6
LR
4169 if (tb[RTA_PREF]) {
4170 pref = nla_get_u8(tb[RTA_PREF]);
4171 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4172 pref != ICMPV6_ROUTER_PREF_HIGH)
4173 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4174 cfg->fc_flags |= RTF_PREF(pref);
4175 }
4176
19e42e45
RP
4177 if (tb[RTA_ENCAP])
4178 cfg->fc_encap = tb[RTA_ENCAP];
4179
9ed59592 4180 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4181 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4182
c255bd68 4183 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4184 if (err < 0)
4185 goto errout;
4186 }
4187
32bc201e
XL
4188 if (tb[RTA_EXPIRES]) {
4189 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4190
4191 if (addrconf_finite_timeout(timeout)) {
4192 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4193 cfg->fc_flags |= RTF_EXPIRES;
4194 }
4195 }
4196
86872cb5
TG
4197 err = 0;
4198errout:
4199 return err;
1da177e4
LT
4200}
4201
6b9ea5a6 4202struct rt6_nh {
8d1c802b 4203 struct fib6_info *fib6_info;
6b9ea5a6 4204 struct fib6_config r_cfg;
6b9ea5a6
RP
4205 struct list_head next;
4206};
4207
4208static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4209{
4210 struct rt6_nh *nh;
4211
4212 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 4213 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
4214 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4215 nh->r_cfg.fc_ifindex);
4216 }
4217}
4218
d4ead6b3
DA
4219static int ip6_route_info_append(struct net *net,
4220 struct list_head *rt6_nh_list,
8d1c802b
DA
4221 struct fib6_info *rt,
4222 struct fib6_config *r_cfg)
6b9ea5a6
RP
4223{
4224 struct rt6_nh *nh;
6b9ea5a6
RP
4225 int err = -EEXIST;
4226
4227 list_for_each_entry(nh, rt6_nh_list, next) {
8d1c802b
DA
4228 /* check if fib6_info already exists */
4229 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
6b9ea5a6
RP
4230 return err;
4231 }
4232
4233 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4234 if (!nh)
4235 return -ENOMEM;
8d1c802b 4236 nh->fib6_info = rt;
d4ead6b3 4237 err = ip6_convert_metrics(net, rt, r_cfg);
6b9ea5a6
RP
4238 if (err) {
4239 kfree(nh);
4240 return err;
4241 }
4242 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4243 list_add_tail(&nh->next, rt6_nh_list);
4244
4245 return 0;
4246}
4247
8d1c802b
DA
4248static void ip6_route_mpath_notify(struct fib6_info *rt,
4249 struct fib6_info *rt_last,
3b1137fe
DA
4250 struct nl_info *info,
4251 __u16 nlflags)
4252{
4253 /* if this is an APPEND route, then rt points to the first route
4254 * inserted and rt_last points to last route inserted. Userspace
4255 * wants a consistent dump of the route which starts at the first
4256 * nexthop. Since sibling routes are always added at the end of
4257 * the list, find the first sibling of the last route appended
4258 */
93c2fb25
DA
4259 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4260 rt = list_first_entry(&rt_last->fib6_siblings,
8d1c802b 4261 struct fib6_info,
93c2fb25 4262 fib6_siblings);
3b1137fe
DA
4263 }
4264
4265 if (rt)
4266 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4267}
4268
333c4301
DA
4269static int ip6_route_multipath_add(struct fib6_config *cfg,
4270 struct netlink_ext_ack *extack)
51ebd318 4271{
8d1c802b 4272 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
3b1137fe 4273 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4274 struct fib6_config r_cfg;
4275 struct rtnexthop *rtnh;
8d1c802b 4276 struct fib6_info *rt;
6b9ea5a6
RP
4277 struct rt6_nh *err_nh;
4278 struct rt6_nh *nh, *nh_safe;
3b1137fe 4279 __u16 nlflags;
51ebd318
ND
4280 int remaining;
4281 int attrlen;
6b9ea5a6
RP
4282 int err = 1;
4283 int nhn = 0;
4284 int replace = (cfg->fc_nlinfo.nlh &&
4285 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4286 LIST_HEAD(rt6_nh_list);
51ebd318 4287
3b1137fe
DA
4288 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4289 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4290 nlflags |= NLM_F_APPEND;
4291
35f1b4e9 4292 remaining = cfg->fc_mp_len;
51ebd318 4293 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4294
6b9ea5a6 4295 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
8d1c802b 4296 * fib6_info structs per nexthop
6b9ea5a6 4297 */
51ebd318
ND
4298 while (rtnh_ok(rtnh, remaining)) {
4299 memcpy(&r_cfg, cfg, sizeof(*cfg));
4300 if (rtnh->rtnh_ifindex)
4301 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4302
4303 attrlen = rtnh_attrlen(rtnh);
4304 if (attrlen > 0) {
4305 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4306
4307 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4308 if (nla) {
67b61f6c 4309 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4310 r_cfg.fc_flags |= RTF_GATEWAY;
4311 }
19e42e45
RP
4312 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4313 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4314 if (nla)
4315 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4316 }
6b9ea5a6 4317
68e2ffde 4318 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
acb54e3c 4319 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
8c5b83f0
RP
4320 if (IS_ERR(rt)) {
4321 err = PTR_ERR(rt);
4322 rt = NULL;
6b9ea5a6 4323 goto cleanup;
8c5b83f0 4324 }
6b9ea5a6 4325
5e670d84 4326 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
398958ae 4327
d4ead6b3
DA
4328 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4329 rt, &r_cfg);
51ebd318 4330 if (err) {
93531c67 4331 fib6_info_release(rt);
6b9ea5a6
RP
4332 goto cleanup;
4333 }
4334
4335 rtnh = rtnh_next(rtnh, &remaining);
4336 }
4337
3b1137fe
DA
4338 /* for add and replace send one notification with all nexthops.
4339 * Skip the notification in fib6_add_rt2node and send one with
4340 * the full route when done
4341 */
4342 info->skip_notify = 1;
4343
6b9ea5a6
RP
4344 err_nh = NULL;
4345 list_for_each_entry(nh, &rt6_nh_list, next) {
8d1c802b
DA
4346 rt_last = nh->fib6_info;
4347 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4348 fib6_info_release(nh->fib6_info);
93531c67 4349
3b1137fe
DA
4350 /* save reference to first route for notification */
4351 if (!rt_notif && !err)
8d1c802b 4352 rt_notif = nh->fib6_info;
3b1137fe 4353
8d1c802b
DA
4354 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4355 nh->fib6_info = NULL;
6b9ea5a6
RP
4356 if (err) {
4357 if (replace && nhn)
4358 ip6_print_replace_route_err(&rt6_nh_list);
4359 err_nh = nh;
4360 goto add_errout;
51ebd318 4361 }
6b9ea5a6 4362
1a72418b 4363 /* Because each route is added like a single route we remove
27596472
MK
4364 * these flags after the first nexthop: if there is a collision,
4365 * we have already failed to add the first nexthop:
4366 * fib6_add_rt2node() has rejected it; when replacing, old
4367 * nexthops have been replaced by first new, the rest should
4368 * be added to it.
1a72418b 4369 */
27596472
MK
4370 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4371 NLM_F_REPLACE);
6b9ea5a6
RP
4372 nhn++;
4373 }
4374
3b1137fe
DA
4375 /* success ... tell user about new route */
4376 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4377 goto cleanup;
4378
4379add_errout:
3b1137fe
DA
4380 /* send notification for routes that were added so that
4381 * the delete notifications sent by ip6_route_del are
4382 * coherent
4383 */
4384 if (rt_notif)
4385 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4386
6b9ea5a6
RP
4387 /* Delete routes that were already added */
4388 list_for_each_entry(nh, &rt6_nh_list, next) {
4389 if (err_nh == nh)
4390 break;
333c4301 4391 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4392 }
4393
4394cleanup:
4395 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
8d1c802b
DA
4396 if (nh->fib6_info)
4397 fib6_info_release(nh->fib6_info);
6b9ea5a6
RP
4398 list_del(&nh->next);
4399 kfree(nh);
4400 }
4401
4402 return err;
4403}
4404
333c4301
DA
4405static int ip6_route_multipath_del(struct fib6_config *cfg,
4406 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4407{
4408 struct fib6_config r_cfg;
4409 struct rtnexthop *rtnh;
4410 int remaining;
4411 int attrlen;
4412 int err = 1, last_err = 0;
4413
4414 remaining = cfg->fc_mp_len;
4415 rtnh = (struct rtnexthop *)cfg->fc_mp;
4416
4417 /* Parse a Multipath Entry */
4418 while (rtnh_ok(rtnh, remaining)) {
4419 memcpy(&r_cfg, cfg, sizeof(*cfg));
4420 if (rtnh->rtnh_ifindex)
4421 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4422
4423 attrlen = rtnh_attrlen(rtnh);
4424 if (attrlen > 0) {
4425 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4426
4427 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4428 if (nla) {
4429 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4430 r_cfg.fc_flags |= RTF_GATEWAY;
4431 }
4432 }
333c4301 4433 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4434 if (err)
4435 last_err = err;
4436
51ebd318
ND
4437 rtnh = rtnh_next(rtnh, &remaining);
4438 }
4439
4440 return last_err;
4441}
4442
c21ef3e3
DA
4443static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4444 struct netlink_ext_ack *extack)
1da177e4 4445{
86872cb5
TG
4446 struct fib6_config cfg;
4447 int err;
1da177e4 4448
333c4301 4449 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4450 if (err < 0)
4451 return err;
4452
51ebd318 4453 if (cfg.fc_mp)
333c4301 4454 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4455 else {
4456 cfg.fc_delete_all_nh = 1;
333c4301 4457 return ip6_route_del(&cfg, extack);
0ae81335 4458 }
1da177e4
LT
4459}
4460
c21ef3e3
DA
4461static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4462 struct netlink_ext_ack *extack)
1da177e4 4463{
86872cb5
TG
4464 struct fib6_config cfg;
4465 int err;
1da177e4 4466
333c4301 4467 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4468 if (err < 0)
4469 return err;
4470
51ebd318 4471 if (cfg.fc_mp)
333c4301 4472 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4473 else
acb54e3c 4474 return ip6_route_add(&cfg, GFP_KERNEL, extack);
1da177e4
LT
4475}
4476
8d1c802b 4477static size_t rt6_nlmsg_size(struct fib6_info *rt)
339bf98f 4478{
beb1afac
DA
4479 int nexthop_len = 0;
4480
93c2fb25 4481 if (rt->fib6_nsiblings) {
beb1afac
DA
4482 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4483 + NLA_ALIGN(sizeof(struct rtnexthop))
4484 + nla_total_size(16) /* RTA_GATEWAY */
5e670d84 4485 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
beb1afac 4486
93c2fb25 4487 nexthop_len *= rt->fib6_nsiblings;
beb1afac
DA
4488 }
4489
339bf98f
TG
4490 return NLMSG_ALIGN(sizeof(struct rtmsg))
4491 + nla_total_size(16) /* RTA_SRC */
4492 + nla_total_size(16) /* RTA_DST */
4493 + nla_total_size(16) /* RTA_GATEWAY */
4494 + nla_total_size(16) /* RTA_PREFSRC */
4495 + nla_total_size(4) /* RTA_TABLE */
4496 + nla_total_size(4) /* RTA_IIF */
4497 + nla_total_size(4) /* RTA_OIF */
4498 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4499 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4500 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4501 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4502 + nla_total_size(1) /* RTA_PREF */
5e670d84 4503 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
beb1afac
DA
4504 + nexthop_len;
4505}
4506
8d1c802b 4507static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
5be083ce 4508 unsigned int *flags, bool skip_oif)
beb1afac 4509{
5e670d84 4510 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
f9d882ea
IS
4511 *flags |= RTNH_F_DEAD;
4512
5e670d84 4513 if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
beb1afac 4514 *flags |= RTNH_F_LINKDOWN;
dcd1f572
DA
4515
4516 rcu_read_lock();
4517 if (fib6_ignore_linkdown(rt))
beb1afac 4518 *flags |= RTNH_F_DEAD;
dcd1f572 4519 rcu_read_unlock();
beb1afac
DA
4520 }
4521
93c2fb25 4522 if (rt->fib6_flags & RTF_GATEWAY) {
5e670d84 4523 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
beb1afac
DA
4524 goto nla_put_failure;
4525 }
4526
5e670d84
DA
4527 *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4528 if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
4529 *flags |= RTNH_F_OFFLOAD;
4530
5be083ce 4531 /* not needed for multipath encoding b/c it has a rtnexthop struct */
5e670d84
DA
4532 if (!skip_oif && rt->fib6_nh.nh_dev &&
4533 nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
beb1afac
DA
4534 goto nla_put_failure;
4535
5e670d84
DA
4536 if (rt->fib6_nh.nh_lwtstate &&
4537 lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
beb1afac
DA
4538 goto nla_put_failure;
4539
4540 return 0;
4541
4542nla_put_failure:
4543 return -EMSGSIZE;
4544}
4545
5be083ce 4546/* add multipath next hop */
8d1c802b 4547static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
beb1afac 4548{
5e670d84 4549 const struct net_device *dev = rt->fib6_nh.nh_dev;
beb1afac
DA
4550 struct rtnexthop *rtnh;
4551 unsigned int flags = 0;
4552
4553 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4554 if (!rtnh)
4555 goto nla_put_failure;
4556
5e670d84
DA
4557 rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4558 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
beb1afac 4559
5be083ce 4560 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4561 goto nla_put_failure;
4562
4563 rtnh->rtnh_flags = flags;
4564
4565 /* length of rtnetlink header + attributes */
4566 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4567
4568 return 0;
4569
4570nla_put_failure:
4571 return -EMSGSIZE;
339bf98f
TG
4572}
4573
d4ead6b3 4574static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 4575 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 4576 struct in6_addr *dest, struct in6_addr *src,
15e47304 4577 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4578 unsigned int flags)
1da177e4
LT
4579{
4580 struct rtmsg *rtm;
2d7202bf 4581 struct nlmsghdr *nlh;
d4ead6b3
DA
4582 long expires = 0;
4583 u32 *pmetrics;
9e762a4a 4584 u32 table;
1da177e4 4585
15e47304 4586 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4587 if (!nlh)
26932566 4588 return -EMSGSIZE;
2d7202bf
TG
4589
4590 rtm = nlmsg_data(nlh);
1da177e4 4591 rtm->rtm_family = AF_INET6;
93c2fb25
DA
4592 rtm->rtm_dst_len = rt->fib6_dst.plen;
4593 rtm->rtm_src_len = rt->fib6_src.plen;
1da177e4 4594 rtm->rtm_tos = 0;
93c2fb25
DA
4595 if (rt->fib6_table)
4596 table = rt->fib6_table->tb6_id;
c71099ac 4597 else
9e762a4a
PM
4598 table = RT6_TABLE_UNSPEC;
4599 rtm->rtm_table = table;
c78679e8
DM
4600 if (nla_put_u32(skb, RTA_TABLE, table))
4601 goto nla_put_failure;
e8478e80
DA
4602
4603 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4604 rtm->rtm_flags = 0;
4605 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
93c2fb25 4606 rtm->rtm_protocol = rt->fib6_protocol;
1da177e4 4607
93c2fb25 4608 if (rt->fib6_flags & RTF_CACHE)
1da177e4
LT
4609 rtm->rtm_flags |= RTM_F_CLONED;
4610
d4ead6b3
DA
4611 if (dest) {
4612 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4613 goto nla_put_failure;
1ab1457c 4614 rtm->rtm_dst_len = 128;
1da177e4 4615 } else if (rtm->rtm_dst_len)
93c2fb25 4616 if (nla_put_in6_addr(skb, RTA_DST, &rt->fib6_dst.addr))
c78679e8 4617 goto nla_put_failure;
1da177e4
LT
4618#ifdef CONFIG_IPV6_SUBTREES
4619 if (src) {
930345ea 4620 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4621 goto nla_put_failure;
1ab1457c 4622 rtm->rtm_src_len = 128;
c78679e8 4623 } else if (rtm->rtm_src_len &&
93c2fb25 4624 nla_put_in6_addr(skb, RTA_SRC, &rt->fib6_src.addr))
c78679e8 4625 goto nla_put_failure;
1da177e4 4626#endif
7bc570c8
YH
4627 if (iif) {
4628#ifdef CONFIG_IPV6_MROUTE
93c2fb25 4629 if (ipv6_addr_is_multicast(&rt->fib6_dst.addr)) {
fd61c6ba
DA
4630 int err = ip6mr_get_route(net, skb, rtm, portid);
4631
4632 if (err == 0)
4633 return 0;
4634 if (err < 0)
4635 goto nla_put_failure;
7bc570c8
YH
4636 } else
4637#endif
c78679e8
DM
4638 if (nla_put_u32(skb, RTA_IIF, iif))
4639 goto nla_put_failure;
d4ead6b3 4640 } else if (dest) {
1da177e4 4641 struct in6_addr saddr_buf;
d4ead6b3 4642 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4643 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4644 goto nla_put_failure;
1da177e4 4645 }
2d7202bf 4646
93c2fb25 4647 if (rt->fib6_prefsrc.plen) {
c3968a85 4648 struct in6_addr saddr_buf;
93c2fb25 4649 saddr_buf = rt->fib6_prefsrc.addr;
930345ea 4650 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4651 goto nla_put_failure;
c3968a85
DW
4652 }
4653
d4ead6b3
DA
4654 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4655 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4656 goto nla_put_failure;
4657
93c2fb25 4658 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
c78679e8 4659 goto nla_put_failure;
8253947e 4660
beb1afac
DA
4661 /* For multipath routes, walk the siblings list and add
4662 * each as a nexthop within RTA_MULTIPATH.
4663 */
93c2fb25 4664 if (rt->fib6_nsiblings) {
8d1c802b 4665 struct fib6_info *sibling, *next_sibling;
beb1afac
DA
4666 struct nlattr *mp;
4667
4668 mp = nla_nest_start(skb, RTA_MULTIPATH);
4669 if (!mp)
4670 goto nla_put_failure;
4671
4672 if (rt6_add_nexthop(skb, rt) < 0)
4673 goto nla_put_failure;
4674
4675 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25 4676 &rt->fib6_siblings, fib6_siblings) {
beb1afac
DA
4677 if (rt6_add_nexthop(skb, sibling) < 0)
4678 goto nla_put_failure;
4679 }
4680
4681 nla_nest_end(skb, mp);
4682 } else {
5be083ce 4683 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4684 goto nla_put_failure;
4685 }
4686
93c2fb25 4687 if (rt->fib6_flags & RTF_EXPIRES) {
14895687
DA
4688 expires = dst ? dst->expires : rt->expires;
4689 expires -= jiffies;
4690 }
69cdf8f9 4691
d4ead6b3 4692 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 4693 goto nla_put_failure;
2d7202bf 4694
93c2fb25 4695 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->fib6_flags)))
c78ba6d6
LR
4696 goto nla_put_failure;
4697
19e42e45 4698
053c095a
JB
4699 nlmsg_end(skb, nlh);
4700 return 0;
2d7202bf
TG
4701
4702nla_put_failure:
26932566
PM
4703 nlmsg_cancel(skb, nlh);
4704 return -EMSGSIZE;
1da177e4
LT
4705}
4706
8d1c802b 4707int rt6_dump_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4708{
4709 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4710 struct net *net = arg->net;
4711
421842ed 4712 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 4713 return 0;
1da177e4 4714
2d7202bf
TG
4715 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4716 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4717
4718 /* user wants prefix routes only */
4719 if (rtm->rtm_flags & RTM_F_PREFIX &&
93c2fb25 4720 !(rt->fib6_flags & RTF_PREFIX_RT)) {
f8cfe2ce
DA
4721 /* success since this is not a prefix route */
4722 return 1;
4723 }
4724 }
1da177e4 4725
d4ead6b3
DA
4726 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4727 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4728 arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
1da177e4
LT
4729}
4730
c21ef3e3
DA
4731static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4732 struct netlink_ext_ack *extack)
1da177e4 4733{
3b1e0a65 4734 struct net *net = sock_net(in_skb->sk);
ab364a6f 4735 struct nlattr *tb[RTA_MAX+1];
18c3a61c 4736 int err, iif = 0, oif = 0;
a68886a6 4737 struct fib6_info *from;
18c3a61c 4738 struct dst_entry *dst;
ab364a6f 4739 struct rt6_info *rt;
1da177e4 4740 struct sk_buff *skb;
ab364a6f 4741 struct rtmsg *rtm;
4c9483b2 4742 struct flowi6 fl6;
18c3a61c 4743 bool fibmatch;
1da177e4 4744
fceb6435 4745 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4746 extack);
ab364a6f
TG
4747 if (err < 0)
4748 goto errout;
1da177e4 4749
ab364a6f 4750 err = -EINVAL;
4c9483b2 4751 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4752 rtm = nlmsg_data(nlh);
4753 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4754 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4755
ab364a6f
TG
4756 if (tb[RTA_SRC]) {
4757 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4758 goto errout;
4759
4e3fd7a0 4760 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4761 }
4762
4763 if (tb[RTA_DST]) {
4764 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4765 goto errout;
4766
4e3fd7a0 4767 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4768 }
4769
4770 if (tb[RTA_IIF])
4771 iif = nla_get_u32(tb[RTA_IIF]);
4772
4773 if (tb[RTA_OIF])
72331bc0 4774 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4775
2e47b291
LC
4776 if (tb[RTA_MARK])
4777 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4778
622ec2c9
LC
4779 if (tb[RTA_UID])
4780 fl6.flowi6_uid = make_kuid(current_user_ns(),
4781 nla_get_u32(tb[RTA_UID]));
4782 else
4783 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4784
1da177e4
LT
4785 if (iif) {
4786 struct net_device *dev;
72331bc0
SL
4787 int flags = 0;
4788
121622db
FW
4789 rcu_read_lock();
4790
4791 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4792 if (!dev) {
121622db 4793 rcu_read_unlock();
1da177e4 4794 err = -ENODEV;
ab364a6f 4795 goto errout;
1da177e4 4796 }
72331bc0
SL
4797
4798 fl6.flowi6_iif = iif;
4799
4800 if (!ipv6_addr_any(&fl6.saddr))
4801 flags |= RT6_LOOKUP_F_HAS_SADDR;
4802
b75cc8f9 4803 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
4804
4805 rcu_read_unlock();
72331bc0
SL
4806 } else {
4807 fl6.flowi6_oif = oif;
4808
58acfd71 4809 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4810 }
4811
18c3a61c
RP
4812
4813 rt = container_of(dst, struct rt6_info, dst);
4814 if (rt->dst.error) {
4815 err = rt->dst.error;
4816 ip6_rt_put(rt);
4817 goto errout;
1da177e4
LT
4818 }
4819
9d6acb3b
WC
4820 if (rt == net->ipv6.ip6_null_entry) {
4821 err = rt->dst.error;
4822 ip6_rt_put(rt);
4823 goto errout;
4824 }
4825
ab364a6f 4826 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4827 if (!skb) {
94e187c0 4828 ip6_rt_put(rt);
ab364a6f
TG
4829 err = -ENOBUFS;
4830 goto errout;
4831 }
1da177e4 4832
d8d1f30b 4833 skb_dst_set(skb, &rt->dst);
a68886a6
DA
4834
4835 rcu_read_lock();
4836 from = rcu_dereference(rt->from);
4837
18c3a61c 4838 if (fibmatch)
a68886a6 4839 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
18c3a61c
RP
4840 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4841 nlh->nlmsg_seq, 0);
4842 else
a68886a6
DA
4843 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
4844 &fl6.saddr, iif, RTM_NEWROUTE,
d4ead6b3
DA
4845 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4846 0);
a68886a6
DA
4847 rcu_read_unlock();
4848
1da177e4 4849 if (err < 0) {
ab364a6f
TG
4850 kfree_skb(skb);
4851 goto errout;
1da177e4
LT
4852 }
4853
15e47304 4854 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4855errout:
1da177e4 4856 return err;
1da177e4
LT
4857}
4858
8d1c802b 4859void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
37a1d361 4860 unsigned int nlm_flags)
1da177e4
LT
4861{
4862 struct sk_buff *skb;
5578689a 4863 struct net *net = info->nl_net;
528c4ceb
DL
4864 u32 seq;
4865 int err;
4866
4867 err = -ENOBUFS;
38308473 4868 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4869
19e42e45 4870 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4871 if (!skb)
21713ebc
TG
4872 goto errout;
4873
d4ead6b3
DA
4874 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4875 event, info->portid, seq, nlm_flags);
26932566
PM
4876 if (err < 0) {
4877 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4878 WARN_ON(err == -EMSGSIZE);
4879 kfree_skb(skb);
4880 goto errout;
4881 }
15e47304 4882 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4883 info->nlh, gfp_any());
4884 return;
21713ebc
TG
4885errout:
4886 if (err < 0)
5578689a 4887 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4888}
4889
8ed67789 4890static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4891 unsigned long event, void *ptr)
8ed67789 4892{
351638e7 4893 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 4894 struct net *net = dev_net(dev);
8ed67789 4895
242d3a49
WC
4896 if (!(dev->flags & IFF_LOOPBACK))
4897 return NOTIFY_OK;
4898
4899 if (event == NETDEV_REGISTER) {
421842ed 4900 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
d8d1f30b 4901 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
4902 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4903#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 4904 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 4905 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4906 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 4907 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 4908#endif
76da0704
WC
4909 } else if (event == NETDEV_UNREGISTER &&
4910 dev->reg_state != NETREG_UNREGISTERED) {
4911 /* NETDEV_UNREGISTER could be fired for multiple times by
4912 * netdev_wait_allrefs(). Make sure we only call this once.
4913 */
12d94a80 4914 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 4915#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
4916 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4917 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
4918#endif
4919 }
4920
4921 return NOTIFY_OK;
4922}
4923
1da177e4
LT
4924/*
4925 * /proc
4926 */
4927
4928#ifdef CONFIG_PROC_FS
4929
33120b30 4930static const struct file_operations ipv6_route_proc_fops = {
33120b30
AD
4931 .open = ipv6_route_open,
4932 .read = seq_read,
4933 .llseek = seq_lseek,
8d2ca1d7 4934 .release = seq_release_net,
33120b30
AD
4935};
4936
1da177e4
LT
4937static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4938{
69ddb805 4939 struct net *net = (struct net *)seq->private;
1da177e4 4940 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
4941 net->ipv6.rt6_stats->fib_nodes,
4942 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 4943 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
4944 net->ipv6.rt6_stats->fib_rt_entries,
4945 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 4946 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 4947 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
4948
4949 return 0;
4950}
4951
4952static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4953{
de05c557 4954 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
4955}
4956
9a32144e 4957static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
4958 .open = rt6_stats_seq_open,
4959 .read = seq_read,
4960 .llseek = seq_lseek,
b6fcbdb4 4961 .release = single_release_net,
1da177e4
LT
4962};
4963#endif /* CONFIG_PROC_FS */
4964
4965#ifdef CONFIG_SYSCTL
4966
1da177e4 4967static
fe2c6338 4968int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
4969 void __user *buffer, size_t *lenp, loff_t *ppos)
4970{
c486da34
LAG
4971 struct net *net;
4972 int delay;
4973 if (!write)
1da177e4 4974 return -EINVAL;
c486da34
LAG
4975
4976 net = (struct net *)ctl->extra1;
4977 delay = net->ipv6.sysctl.flush_delay;
4978 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 4979 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 4980 return 0;
1da177e4
LT
4981}
4982
fe2c6338 4983struct ctl_table ipv6_route_table_template[] = {
1ab1457c 4984 {
1da177e4 4985 .procname = "flush",
4990509f 4986 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 4987 .maxlen = sizeof(int),
89c8b3a1 4988 .mode = 0200,
6d9f239a 4989 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
4990 },
4991 {
1da177e4 4992 .procname = "gc_thresh",
9a7ec3a9 4993 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
4994 .maxlen = sizeof(int),
4995 .mode = 0644,
6d9f239a 4996 .proc_handler = proc_dointvec,
1da177e4
LT
4997 },
4998 {
1da177e4 4999 .procname = "max_size",
4990509f 5000 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
5001 .maxlen = sizeof(int),
5002 .mode = 0644,
6d9f239a 5003 .proc_handler = proc_dointvec,
1da177e4
LT
5004 },
5005 {
1da177e4 5006 .procname = "gc_min_interval",
4990509f 5007 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5008 .maxlen = sizeof(int),
5009 .mode = 0644,
6d9f239a 5010 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5011 },
5012 {
1da177e4 5013 .procname = "gc_timeout",
4990509f 5014 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
5015 .maxlen = sizeof(int),
5016 .mode = 0644,
6d9f239a 5017 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5018 },
5019 {
1da177e4 5020 .procname = "gc_interval",
4990509f 5021 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
5022 .maxlen = sizeof(int),
5023 .mode = 0644,
6d9f239a 5024 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5025 },
5026 {
1da177e4 5027 .procname = "gc_elasticity",
4990509f 5028 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
5029 .maxlen = sizeof(int),
5030 .mode = 0644,
f3d3f616 5031 .proc_handler = proc_dointvec,
1da177e4
LT
5032 },
5033 {
1da177e4 5034 .procname = "mtu_expires",
4990509f 5035 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
5036 .maxlen = sizeof(int),
5037 .mode = 0644,
6d9f239a 5038 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5039 },
5040 {
1da177e4 5041 .procname = "min_adv_mss",
4990509f 5042 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
5043 .maxlen = sizeof(int),
5044 .mode = 0644,
f3d3f616 5045 .proc_handler = proc_dointvec,
1da177e4
LT
5046 },
5047 {
1da177e4 5048 .procname = "gc_min_interval_ms",
4990509f 5049 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5050 .maxlen = sizeof(int),
5051 .mode = 0644,
6d9f239a 5052 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 5053 },
f8572d8f 5054 { }
1da177e4
LT
5055};
5056
2c8c1e72 5057struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5058{
5059 struct ctl_table *table;
5060
5061 table = kmemdup(ipv6_route_table_template,
5062 sizeof(ipv6_route_table_template),
5063 GFP_KERNEL);
5ee09105
YH
5064
5065 if (table) {
5066 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5067 table[0].extra1 = net;
86393e52 5068 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5069 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5070 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5071 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5072 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5073 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5074 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5075 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5076 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
5077
5078 /* Don't export sysctls to unprivileged users */
5079 if (net->user_ns != &init_user_ns)
5080 table[0].procname = NULL;
5ee09105
YH
5081 }
5082
760f2d01
DL
5083 return table;
5084}
1da177e4
LT
5085#endif
5086
2c8c1e72 5087static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5088{
633d424b 5089 int ret = -ENOMEM;
8ed67789 5090
86393e52
AD
5091 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5092 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5093
fc66f95c
ED
5094 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5095 goto out_ip6_dst_ops;
5096
421842ed
DA
5097 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5098 sizeof(*net->ipv6.fib6_null_entry),
5099 GFP_KERNEL);
5100 if (!net->ipv6.fib6_null_entry)
5101 goto out_ip6_dst_entries;
5102
8ed67789
DL
5103 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5104 sizeof(*net->ipv6.ip6_null_entry),
5105 GFP_KERNEL);
5106 if (!net->ipv6.ip6_null_entry)
421842ed 5107 goto out_fib6_null_entry;
d8d1f30b 5108 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5109 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5110 ip6_template_metrics, true);
8ed67789
DL
5111
5112#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5113 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5114 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5115 sizeof(*net->ipv6.ip6_prohibit_entry),
5116 GFP_KERNEL);
68fffc67
PZ
5117 if (!net->ipv6.ip6_prohibit_entry)
5118 goto out_ip6_null_entry;
d8d1f30b 5119 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5120 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5121 ip6_template_metrics, true);
8ed67789
DL
5122
5123 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5124 sizeof(*net->ipv6.ip6_blk_hole_entry),
5125 GFP_KERNEL);
68fffc67
PZ
5126 if (!net->ipv6.ip6_blk_hole_entry)
5127 goto out_ip6_prohibit_entry;
d8d1f30b 5128 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5129 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5130 ip6_template_metrics, true);
8ed67789
DL
5131#endif
5132
b339a47c
PZ
5133 net->ipv6.sysctl.flush_delay = 0;
5134 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5135 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5136 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5137 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5138 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5139 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5140 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5141
6891a346
BT
5142 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5143
8ed67789
DL
5144 ret = 0;
5145out:
5146 return ret;
f2fc6a54 5147
68fffc67
PZ
5148#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5149out_ip6_prohibit_entry:
5150 kfree(net->ipv6.ip6_prohibit_entry);
5151out_ip6_null_entry:
5152 kfree(net->ipv6.ip6_null_entry);
5153#endif
421842ed
DA
5154out_fib6_null_entry:
5155 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5156out_ip6_dst_entries:
5157 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5158out_ip6_dst_ops:
f2fc6a54 5159 goto out;
cdb18761
DL
5160}
5161
2c8c1e72 5162static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5163{
421842ed 5164 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5165 kfree(net->ipv6.ip6_null_entry);
5166#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5167 kfree(net->ipv6.ip6_prohibit_entry);
5168 kfree(net->ipv6.ip6_blk_hole_entry);
5169#endif
41bb78b4 5170 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5171}
5172
d189634e
TG
5173static int __net_init ip6_route_net_init_late(struct net *net)
5174{
5175#ifdef CONFIG_PROC_FS
d4beaa66 5176 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
d6444062 5177 proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
5178#endif
5179 return 0;
5180}
5181
5182static void __net_exit ip6_route_net_exit_late(struct net *net)
5183{
5184#ifdef CONFIG_PROC_FS
ece31ffd
G
5185 remove_proc_entry("ipv6_route", net->proc_net);
5186 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5187#endif
5188}
5189
cdb18761
DL
5190static struct pernet_operations ip6_route_net_ops = {
5191 .init = ip6_route_net_init,
5192 .exit = ip6_route_net_exit,
5193};
5194
c3426b47
DM
5195static int __net_init ipv6_inetpeer_init(struct net *net)
5196{
5197 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5198
5199 if (!bp)
5200 return -ENOMEM;
5201 inet_peer_base_init(bp);
5202 net->ipv6.peers = bp;
5203 return 0;
5204}
5205
5206static void __net_exit ipv6_inetpeer_exit(struct net *net)
5207{
5208 struct inet_peer_base *bp = net->ipv6.peers;
5209
5210 net->ipv6.peers = NULL;
56a6b248 5211 inetpeer_invalidate_tree(bp);
c3426b47
DM
5212 kfree(bp);
5213}
5214
2b823f72 5215static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5216 .init = ipv6_inetpeer_init,
5217 .exit = ipv6_inetpeer_exit,
5218};
5219
d189634e
TG
5220static struct pernet_operations ip6_route_net_late_ops = {
5221 .init = ip6_route_net_init_late,
5222 .exit = ip6_route_net_exit_late,
5223};
5224
8ed67789
DL
5225static struct notifier_block ip6_route_dev_notifier = {
5226 .notifier_call = ip6_route_dev_notify,
242d3a49 5227 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5228};
5229
2f460933
WC
5230void __init ip6_route_init_special_entries(void)
5231{
5232 /* Registering of the loopback is done before this portion of code,
5233 * the loopback reference in rt6_info will not be taken, do it
5234 * manually for init_net */
421842ed 5235 init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
2f460933
WC
5236 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5237 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5238 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5239 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5240 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5241 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5242 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5243 #endif
5244}
5245
433d49c3 5246int __init ip6_route_init(void)
1da177e4 5247{
433d49c3 5248 int ret;
8d0b94af 5249 int cpu;
433d49c3 5250
9a7ec3a9
DL
5251 ret = -ENOMEM;
5252 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5253 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5254 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5255 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5256 goto out;
14e50e57 5257
fc66f95c 5258 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5259 if (ret)
bdb3289f 5260 goto out_kmem_cache;
bdb3289f 5261
c3426b47
DM
5262 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5263 if (ret)
e8803b6c 5264 goto out_dst_entries;
2a0c451a 5265
7e52b33b
DM
5266 ret = register_pernet_subsys(&ip6_route_net_ops);
5267 if (ret)
5268 goto out_register_inetpeer;
c3426b47 5269
5dc121e9
AE
5270 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5271
e8803b6c 5272 ret = fib6_init();
433d49c3 5273 if (ret)
8ed67789 5274 goto out_register_subsys;
433d49c3 5275
433d49c3
DL
5276 ret = xfrm6_init();
5277 if (ret)
e8803b6c 5278 goto out_fib6_init;
c35b7e72 5279
433d49c3
DL
5280 ret = fib6_rules_init();
5281 if (ret)
5282 goto xfrm6_init;
7e5449c2 5283
d189634e
TG
5284 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5285 if (ret)
5286 goto fib6_rules_init;
5287
16feebcf
FW
5288 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5289 inet6_rtm_newroute, NULL, 0);
5290 if (ret < 0)
5291 goto out_register_late_subsys;
5292
5293 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5294 inet6_rtm_delroute, NULL, 0);
5295 if (ret < 0)
5296 goto out_register_late_subsys;
5297
5298 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5299 inet6_rtm_getroute, NULL,
5300 RTNL_FLAG_DOIT_UNLOCKED);
5301 if (ret < 0)
d189634e 5302 goto out_register_late_subsys;
c127ea2c 5303
8ed67789 5304 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5305 if (ret)
d189634e 5306 goto out_register_late_subsys;
8ed67789 5307
8d0b94af
MKL
5308 for_each_possible_cpu(cpu) {
5309 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5310
5311 INIT_LIST_HEAD(&ul->head);
5312 spin_lock_init(&ul->lock);
5313 }
5314
433d49c3
DL
5315out:
5316 return ret;
5317
d189634e 5318out_register_late_subsys:
16feebcf 5319 rtnl_unregister_all(PF_INET6);
d189634e 5320 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5321fib6_rules_init:
433d49c3
DL
5322 fib6_rules_cleanup();
5323xfrm6_init:
433d49c3 5324 xfrm6_fini();
2a0c451a
TG
5325out_fib6_init:
5326 fib6_gc_cleanup();
8ed67789
DL
5327out_register_subsys:
5328 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5329out_register_inetpeer:
5330 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5331out_dst_entries:
5332 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5333out_kmem_cache:
f2fc6a54 5334 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5335 goto out;
1da177e4
LT
5336}
5337
5338void ip6_route_cleanup(void)
5339{
8ed67789 5340 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5341 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5342 fib6_rules_cleanup();
1da177e4 5343 xfrm6_fini();
1da177e4 5344 fib6_gc_cleanup();
c3426b47 5345 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5346 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5347 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5348 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5349}