]> git.ipfire.org Git - thirdparty/kernel/stable.git/blob - net/ipv4/ip_tunnel.c
NFS4: Only set creation opendata if O_CREAT
[thirdparty/kernel/stable.git] / net / ipv4 / ip_tunnel.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2013 Nicira, Inc.
4 */
5
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8 #include <linux/capability.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/slab.h>
13 #include <linux/uaccess.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/tcp.h>
18 #include <linux/udp.h>
19 #include <linux/if_arp.h>
20 #include <linux/init.h>
21 #include <linux/in6.h>
22 #include <linux/inetdevice.h>
23 #include <linux/igmp.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <linux/rculist.h>
29 #include <linux/err.h>
30
31 #include <net/sock.h>
32 #include <net/ip.h>
33 #include <net/icmp.h>
34 #include <net/protocol.h>
35 #include <net/ip_tunnels.h>
36 #include <net/arp.h>
37 #include <net/checksum.h>
38 #include <net/dsfield.h>
39 #include <net/inet_ecn.h>
40 #include <net/xfrm.h>
41 #include <net/net_namespace.h>
42 #include <net/netns/generic.h>
43 #include <net/rtnetlink.h>
44 #include <net/udp.h>
45 #include <net/dst_metadata.h>
46
47 #if IS_ENABLED(CONFIG_IPV6)
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
52
53 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
54 {
55 return hash_32((__force u32)key ^ (__force u32)remote,
56 IP_TNL_HASH_BITS);
57 }
58
59 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60 __be16 flags, __be32 key)
61 {
62 if (p->i_flags & TUNNEL_KEY) {
63 if (flags & TUNNEL_KEY)
64 return key == p->i_key;
65 else
66 /* key expected, none present */
67 return false;
68 } else
69 return !(flags & TUNNEL_KEY);
70 }
71
72 /* Fallback tunnel: no source, no destination, no key, no options
73
74 Tunnel hash table:
75 We require exact key match i.e. if a key is present in packet
76 it will match only tunnel with the same key; if it is not present,
77 it will match only keyless tunnel.
78
79 All keysless packets, if not matched configured keyless tunnels
80 will match fallback tunnel.
81 Given src, dst and key, find appropriate for input tunnel.
82 */
83 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84 int link, __be16 flags,
85 __be32 remote, __be32 local,
86 __be32 key)
87 {
88 unsigned int hash;
89 struct ip_tunnel *t, *cand = NULL;
90 struct hlist_head *head;
91
92 hash = ip_tunnel_hash(key, remote);
93 head = &itn->tunnels[hash];
94
95 hlist_for_each_entry_rcu(t, head, hash_node) {
96 if (local != t->parms.iph.saddr ||
97 remote != t->parms.iph.daddr ||
98 !(t->dev->flags & IFF_UP))
99 continue;
100
101 if (!ip_tunnel_key_match(&t->parms, flags, key))
102 continue;
103
104 if (t->parms.link == link)
105 return t;
106 else
107 cand = t;
108 }
109
110 hlist_for_each_entry_rcu(t, head, hash_node) {
111 if (remote != t->parms.iph.daddr ||
112 t->parms.iph.saddr != 0 ||
113 !(t->dev->flags & IFF_UP))
114 continue;
115
116 if (!ip_tunnel_key_match(&t->parms, flags, key))
117 continue;
118
119 if (t->parms.link == link)
120 return t;
121 else if (!cand)
122 cand = t;
123 }
124
125 hash = ip_tunnel_hash(key, 0);
126 head = &itn->tunnels[hash];
127
128 hlist_for_each_entry_rcu(t, head, hash_node) {
129 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
130 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
131 continue;
132
133 if (!(t->dev->flags & IFF_UP))
134 continue;
135
136 if (!ip_tunnel_key_match(&t->parms, flags, key))
137 continue;
138
139 if (t->parms.link == link)
140 return t;
141 else if (!cand)
142 cand = t;
143 }
144
145 if (flags & TUNNEL_NO_KEY)
146 goto skip_key_lookup;
147
148 hlist_for_each_entry_rcu(t, head, hash_node) {
149 if (t->parms.i_key != key ||
150 t->parms.iph.saddr != 0 ||
151 t->parms.iph.daddr != 0 ||
152 !(t->dev->flags & IFF_UP))
153 continue;
154
155 if (t->parms.link == link)
156 return t;
157 else if (!cand)
158 cand = t;
159 }
160
161 skip_key_lookup:
162 if (cand)
163 return cand;
164
165 t = rcu_dereference(itn->collect_md_tun);
166 if (t && t->dev->flags & IFF_UP)
167 return t;
168
169 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
170 return netdev_priv(itn->fb_tunnel_dev);
171
172 return NULL;
173 }
174 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
175
176 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
177 struct ip_tunnel_parm *parms)
178 {
179 unsigned int h;
180 __be32 remote;
181 __be32 i_key = parms->i_key;
182
183 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
184 remote = parms->iph.daddr;
185 else
186 remote = 0;
187
188 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
189 i_key = 0;
190
191 h = ip_tunnel_hash(i_key, remote);
192 return &itn->tunnels[h];
193 }
194
195 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
196 {
197 struct hlist_head *head = ip_bucket(itn, &t->parms);
198
199 if (t->collect_md)
200 rcu_assign_pointer(itn->collect_md_tun, t);
201 hlist_add_head_rcu(&t->hash_node, head);
202 }
203
204 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
205 {
206 if (t->collect_md)
207 rcu_assign_pointer(itn->collect_md_tun, NULL);
208 hlist_del_init_rcu(&t->hash_node);
209 }
210
211 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
212 struct ip_tunnel_parm *parms,
213 int type)
214 {
215 __be32 remote = parms->iph.daddr;
216 __be32 local = parms->iph.saddr;
217 __be32 key = parms->i_key;
218 __be16 flags = parms->i_flags;
219 int link = parms->link;
220 struct ip_tunnel *t = NULL;
221 struct hlist_head *head = ip_bucket(itn, parms);
222
223 hlist_for_each_entry_rcu(t, head, hash_node) {
224 if (local == t->parms.iph.saddr &&
225 remote == t->parms.iph.daddr &&
226 link == t->parms.link &&
227 type == t->dev->type &&
228 ip_tunnel_key_match(&t->parms, flags, key))
229 break;
230 }
231 return t;
232 }
233
234 static struct net_device *__ip_tunnel_create(struct net *net,
235 const struct rtnl_link_ops *ops,
236 struct ip_tunnel_parm *parms)
237 {
238 int err;
239 struct ip_tunnel *tunnel;
240 struct net_device *dev;
241 char name[IFNAMSIZ];
242
243 err = -E2BIG;
244 if (parms->name[0]) {
245 if (!dev_valid_name(parms->name))
246 goto failed;
247 strlcpy(name, parms->name, IFNAMSIZ);
248 } else {
249 if (strlen(ops->kind) > (IFNAMSIZ - 3))
250 goto failed;
251 strcpy(name, ops->kind);
252 strcat(name, "%d");
253 }
254
255 ASSERT_RTNL();
256 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
257 if (!dev) {
258 err = -ENOMEM;
259 goto failed;
260 }
261 dev_net_set(dev, net);
262
263 dev->rtnl_link_ops = ops;
264
265 tunnel = netdev_priv(dev);
266 tunnel->parms = *parms;
267 tunnel->net = net;
268
269 err = register_netdevice(dev);
270 if (err)
271 goto failed_free;
272
273 return dev;
274
275 failed_free:
276 free_netdev(dev);
277 failed:
278 return ERR_PTR(err);
279 }
280
281 static int ip_tunnel_bind_dev(struct net_device *dev)
282 {
283 struct net_device *tdev = NULL;
284 struct ip_tunnel *tunnel = netdev_priv(dev);
285 const struct iphdr *iph;
286 int hlen = LL_MAX_HEADER;
287 int mtu = ETH_DATA_LEN;
288 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
289
290 iph = &tunnel->parms.iph;
291
292 /* Guess output device to choose reasonable mtu and needed_headroom */
293 if (iph->daddr) {
294 struct flowi4 fl4;
295 struct rtable *rt;
296
297 ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
298 iph->saddr, tunnel->parms.o_key,
299 RT_TOS(iph->tos), tunnel->parms.link,
300 tunnel->fwmark, 0);
301 rt = ip_route_output_key(tunnel->net, &fl4);
302
303 if (!IS_ERR(rt)) {
304 tdev = rt->dst.dev;
305 ip_rt_put(rt);
306 }
307 if (dev->type != ARPHRD_ETHER)
308 dev->flags |= IFF_POINTOPOINT;
309
310 dst_cache_reset(&tunnel->dst_cache);
311 }
312
313 if (!tdev && tunnel->parms.link)
314 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
315
316 if (tdev) {
317 hlen = tdev->hard_header_len + tdev->needed_headroom;
318 mtu = min(tdev->mtu, IP_MAX_MTU);
319 }
320
321 dev->needed_headroom = t_hlen + hlen;
322 mtu -= (dev->hard_header_len + t_hlen);
323
324 if (mtu < IPV4_MIN_MTU)
325 mtu = IPV4_MIN_MTU;
326
327 return mtu;
328 }
329
330 static struct ip_tunnel *ip_tunnel_create(struct net *net,
331 struct ip_tunnel_net *itn,
332 struct ip_tunnel_parm *parms)
333 {
334 struct ip_tunnel *nt;
335 struct net_device *dev;
336 int t_hlen;
337 int mtu;
338 int err;
339
340 dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
341 if (IS_ERR(dev))
342 return ERR_CAST(dev);
343
344 mtu = ip_tunnel_bind_dev(dev);
345 err = dev_set_mtu(dev, mtu);
346 if (err)
347 goto err_dev_set_mtu;
348
349 nt = netdev_priv(dev);
350 t_hlen = nt->hlen + sizeof(struct iphdr);
351 dev->min_mtu = ETH_MIN_MTU;
352 dev->max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
353 ip_tunnel_add(itn, nt);
354 return nt;
355
356 err_dev_set_mtu:
357 unregister_netdevice(dev);
358 return ERR_PTR(err);
359 }
360
361 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
362 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
363 bool log_ecn_error)
364 {
365 struct pcpu_sw_netstats *tstats;
366 const struct iphdr *iph = ip_hdr(skb);
367 int err;
368
369 #ifdef CONFIG_NET_IPGRE_BROADCAST
370 if (ipv4_is_multicast(iph->daddr)) {
371 tunnel->dev->stats.multicast++;
372 skb->pkt_type = PACKET_BROADCAST;
373 }
374 #endif
375
376 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
377 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
378 tunnel->dev->stats.rx_crc_errors++;
379 tunnel->dev->stats.rx_errors++;
380 goto drop;
381 }
382
383 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
384 if (!(tpi->flags&TUNNEL_SEQ) ||
385 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
386 tunnel->dev->stats.rx_fifo_errors++;
387 tunnel->dev->stats.rx_errors++;
388 goto drop;
389 }
390 tunnel->i_seqno = ntohl(tpi->seq) + 1;
391 }
392
393 skb_reset_network_header(skb);
394
395 err = IP_ECN_decapsulate(iph, skb);
396 if (unlikely(err)) {
397 if (log_ecn_error)
398 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
399 &iph->saddr, iph->tos);
400 if (err > 1) {
401 ++tunnel->dev->stats.rx_frame_errors;
402 ++tunnel->dev->stats.rx_errors;
403 goto drop;
404 }
405 }
406
407 tstats = this_cpu_ptr(tunnel->dev->tstats);
408 u64_stats_update_begin(&tstats->syncp);
409 tstats->rx_packets++;
410 tstats->rx_bytes += skb->len;
411 u64_stats_update_end(&tstats->syncp);
412
413 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
414
415 if (tunnel->dev->type == ARPHRD_ETHER) {
416 skb->protocol = eth_type_trans(skb, tunnel->dev);
417 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
418 } else {
419 skb->dev = tunnel->dev;
420 }
421
422 if (tun_dst)
423 skb_dst_set(skb, (struct dst_entry *)tun_dst);
424
425 gro_cells_receive(&tunnel->gro_cells, skb);
426 return 0;
427
428 drop:
429 if (tun_dst)
430 dst_release((struct dst_entry *)tun_dst);
431 kfree_skb(skb);
432 return 0;
433 }
434 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
435
436 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
437 unsigned int num)
438 {
439 if (num >= MAX_IPTUN_ENCAP_OPS)
440 return -ERANGE;
441
442 return !cmpxchg((const struct ip_tunnel_encap_ops **)
443 &iptun_encaps[num],
444 NULL, ops) ? 0 : -1;
445 }
446 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
447
448 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
449 unsigned int num)
450 {
451 int ret;
452
453 if (num >= MAX_IPTUN_ENCAP_OPS)
454 return -ERANGE;
455
456 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
457 &iptun_encaps[num],
458 ops, NULL) == ops) ? 0 : -1;
459
460 synchronize_net();
461
462 return ret;
463 }
464 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
465
466 int ip_tunnel_encap_setup(struct ip_tunnel *t,
467 struct ip_tunnel_encap *ipencap)
468 {
469 int hlen;
470
471 memset(&t->encap, 0, sizeof(t->encap));
472
473 hlen = ip_encap_hlen(ipencap);
474 if (hlen < 0)
475 return hlen;
476
477 t->encap.type = ipencap->type;
478 t->encap.sport = ipencap->sport;
479 t->encap.dport = ipencap->dport;
480 t->encap.flags = ipencap->flags;
481
482 t->encap_hlen = hlen;
483 t->hlen = t->encap_hlen + t->tun_hlen;
484
485 return 0;
486 }
487 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
488
489 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
490 struct rtable *rt, __be16 df,
491 const struct iphdr *inner_iph,
492 int tunnel_hlen, __be32 dst, bool md)
493 {
494 struct ip_tunnel *tunnel = netdev_priv(dev);
495 int pkt_size;
496 int mtu;
497
498 tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
499 pkt_size = skb->len - tunnel_hlen - dev->hard_header_len;
500
501 if (df)
502 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
503 - sizeof(struct iphdr) - tunnel_hlen;
504 else
505 mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
506
507 if (skb_valid_dst(skb))
508 skb_dst_update_pmtu(skb, mtu);
509
510 if (skb->protocol == htons(ETH_P_IP)) {
511 if (!skb_is_gso(skb) &&
512 (inner_iph->frag_off & htons(IP_DF)) &&
513 mtu < pkt_size) {
514 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
515 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
516 return -E2BIG;
517 }
518 }
519 #if IS_ENABLED(CONFIG_IPV6)
520 else if (skb->protocol == htons(ETH_P_IPV6)) {
521 struct rt6_info *rt6;
522 __be32 daddr;
523
524 rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
525 NULL;
526 daddr = md ? dst : tunnel->parms.iph.daddr;
527
528 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
529 mtu >= IPV6_MIN_MTU) {
530 if ((daddr && !ipv4_is_multicast(daddr)) ||
531 rt6->rt6i_dst.plen == 128) {
532 rt6->rt6i_flags |= RTF_MODIFIED;
533 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
534 }
535 }
536
537 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
538 mtu < pkt_size) {
539 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
540 return -E2BIG;
541 }
542 }
543 #endif
544 return 0;
545 }
546
547 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
548 u8 proto, int tunnel_hlen)
549 {
550 struct ip_tunnel *tunnel = netdev_priv(dev);
551 u32 headroom = sizeof(struct iphdr);
552 struct ip_tunnel_info *tun_info;
553 const struct ip_tunnel_key *key;
554 const struct iphdr *inner_iph;
555 struct rtable *rt = NULL;
556 struct flowi4 fl4;
557 __be16 df = 0;
558 u8 tos, ttl;
559 bool use_cache;
560
561 tun_info = skb_tunnel_info(skb);
562 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
563 ip_tunnel_info_af(tun_info) != AF_INET))
564 goto tx_error;
565 key = &tun_info->key;
566 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
567 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
568 tos = key->tos;
569 if (tos == 1) {
570 if (skb->protocol == htons(ETH_P_IP))
571 tos = inner_iph->tos;
572 else if (skb->protocol == htons(ETH_P_IPV6))
573 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
574 }
575 ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
576 tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
577 0, skb->mark, skb_get_hash(skb));
578 if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
579 goto tx_error;
580
581 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
582 if (use_cache)
583 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
584 if (!rt) {
585 rt = ip_route_output_key(tunnel->net, &fl4);
586 if (IS_ERR(rt)) {
587 dev->stats.tx_carrier_errors++;
588 goto tx_error;
589 }
590 if (use_cache)
591 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
592 fl4.saddr);
593 }
594 if (rt->dst.dev == dev) {
595 ip_rt_put(rt);
596 dev->stats.collisions++;
597 goto tx_error;
598 }
599
600 if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
601 df = htons(IP_DF);
602 if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
603 key->u.ipv4.dst, true)) {
604 ip_rt_put(rt);
605 goto tx_error;
606 }
607
608 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
609 ttl = key->ttl;
610 if (ttl == 0) {
611 if (skb->protocol == htons(ETH_P_IP))
612 ttl = inner_iph->ttl;
613 else if (skb->protocol == htons(ETH_P_IPV6))
614 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
615 else
616 ttl = ip4_dst_hoplimit(&rt->dst);
617 }
618
619 if (!df && skb->protocol == htons(ETH_P_IP))
620 df = inner_iph->frag_off & htons(IP_DF);
621
622 headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
623 if (headroom > dev->needed_headroom)
624 dev->needed_headroom = headroom;
625
626 if (skb_cow_head(skb, dev->needed_headroom)) {
627 ip_rt_put(rt);
628 goto tx_dropped;
629 }
630 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
631 df, !net_eq(tunnel->net, dev_net(dev)));
632 return;
633 tx_error:
634 dev->stats.tx_errors++;
635 goto kfree;
636 tx_dropped:
637 dev->stats.tx_dropped++;
638 kfree:
639 kfree_skb(skb);
640 }
641 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
642
643 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
644 const struct iphdr *tnl_params, u8 protocol)
645 {
646 struct ip_tunnel *tunnel = netdev_priv(dev);
647 struct ip_tunnel_info *tun_info = NULL;
648 const struct iphdr *inner_iph;
649 unsigned int max_headroom; /* The extra header space needed */
650 struct rtable *rt = NULL; /* Route to the other host */
651 bool use_cache = false;
652 struct flowi4 fl4;
653 bool md = false;
654 bool connected;
655 u8 tos, ttl;
656 __be32 dst;
657 __be16 df;
658
659 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
660 connected = (tunnel->parms.iph.daddr != 0);
661
662 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
663
664 dst = tnl_params->daddr;
665 if (dst == 0) {
666 /* NBMA tunnel */
667
668 if (!skb_dst(skb)) {
669 dev->stats.tx_fifo_errors++;
670 goto tx_error;
671 }
672
673 tun_info = skb_tunnel_info(skb);
674 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
675 ip_tunnel_info_af(tun_info) == AF_INET &&
676 tun_info->key.u.ipv4.dst) {
677 dst = tun_info->key.u.ipv4.dst;
678 md = true;
679 connected = true;
680 }
681 else if (skb->protocol == htons(ETH_P_IP)) {
682 rt = skb_rtable(skb);
683 dst = rt_nexthop(rt, inner_iph->daddr);
684 }
685 #if IS_ENABLED(CONFIG_IPV6)
686 else if (skb->protocol == htons(ETH_P_IPV6)) {
687 const struct in6_addr *addr6;
688 struct neighbour *neigh;
689 bool do_tx_error_icmp;
690 int addr_type;
691
692 neigh = dst_neigh_lookup(skb_dst(skb),
693 &ipv6_hdr(skb)->daddr);
694 if (!neigh)
695 goto tx_error;
696
697 addr6 = (const struct in6_addr *)&neigh->primary_key;
698 addr_type = ipv6_addr_type(addr6);
699
700 if (addr_type == IPV6_ADDR_ANY) {
701 addr6 = &ipv6_hdr(skb)->daddr;
702 addr_type = ipv6_addr_type(addr6);
703 }
704
705 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
706 do_tx_error_icmp = true;
707 else {
708 do_tx_error_icmp = false;
709 dst = addr6->s6_addr32[3];
710 }
711 neigh_release(neigh);
712 if (do_tx_error_icmp)
713 goto tx_error_icmp;
714 }
715 #endif
716 else
717 goto tx_error;
718
719 if (!md)
720 connected = false;
721 }
722
723 tos = tnl_params->tos;
724 if (tos & 0x1) {
725 tos &= ~0x1;
726 if (skb->protocol == htons(ETH_P_IP)) {
727 tos = inner_iph->tos;
728 connected = false;
729 } else if (skb->protocol == htons(ETH_P_IPV6)) {
730 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
731 connected = false;
732 }
733 }
734
735 ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
736 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
737 tunnel->fwmark, skb_get_hash(skb));
738
739 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
740 goto tx_error;
741
742 if (connected && md) {
743 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
744 if (use_cache)
745 rt = dst_cache_get_ip4(&tun_info->dst_cache,
746 &fl4.saddr);
747 } else {
748 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
749 &fl4.saddr) : NULL;
750 }
751
752 if (!rt) {
753 rt = ip_route_output_key(tunnel->net, &fl4);
754
755 if (IS_ERR(rt)) {
756 dev->stats.tx_carrier_errors++;
757 goto tx_error;
758 }
759 if (use_cache)
760 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
761 fl4.saddr);
762 else if (!md && connected)
763 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
764 fl4.saddr);
765 }
766
767 if (rt->dst.dev == dev) {
768 ip_rt_put(rt);
769 dev->stats.collisions++;
770 goto tx_error;
771 }
772
773 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph,
774 0, 0, false)) {
775 ip_rt_put(rt);
776 goto tx_error;
777 }
778
779 if (tunnel->err_count > 0) {
780 if (time_before(jiffies,
781 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
782 tunnel->err_count--;
783
784 dst_link_failure(skb);
785 } else
786 tunnel->err_count = 0;
787 }
788
789 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
790 ttl = tnl_params->ttl;
791 if (ttl == 0) {
792 if (skb->protocol == htons(ETH_P_IP))
793 ttl = inner_iph->ttl;
794 #if IS_ENABLED(CONFIG_IPV6)
795 else if (skb->protocol == htons(ETH_P_IPV6))
796 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
797 #endif
798 else
799 ttl = ip4_dst_hoplimit(&rt->dst);
800 }
801
802 df = tnl_params->frag_off;
803 if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
804 df |= (inner_iph->frag_off&htons(IP_DF));
805
806 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
807 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
808 if (max_headroom > dev->needed_headroom)
809 dev->needed_headroom = max_headroom;
810
811 if (skb_cow_head(skb, dev->needed_headroom)) {
812 ip_rt_put(rt);
813 dev->stats.tx_dropped++;
814 kfree_skb(skb);
815 return;
816 }
817
818 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
819 df, !net_eq(tunnel->net, dev_net(dev)));
820 return;
821
822 #if IS_ENABLED(CONFIG_IPV6)
823 tx_error_icmp:
824 dst_link_failure(skb);
825 #endif
826 tx_error:
827 dev->stats.tx_errors++;
828 kfree_skb(skb);
829 }
830 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
831
832 static void ip_tunnel_update(struct ip_tunnel_net *itn,
833 struct ip_tunnel *t,
834 struct net_device *dev,
835 struct ip_tunnel_parm *p,
836 bool set_mtu,
837 __u32 fwmark)
838 {
839 ip_tunnel_del(itn, t);
840 t->parms.iph.saddr = p->iph.saddr;
841 t->parms.iph.daddr = p->iph.daddr;
842 t->parms.i_key = p->i_key;
843 t->parms.o_key = p->o_key;
844 if (dev->type != ARPHRD_ETHER) {
845 memcpy(dev->dev_addr, &p->iph.saddr, 4);
846 memcpy(dev->broadcast, &p->iph.daddr, 4);
847 }
848 ip_tunnel_add(itn, t);
849
850 t->parms.iph.ttl = p->iph.ttl;
851 t->parms.iph.tos = p->iph.tos;
852 t->parms.iph.frag_off = p->iph.frag_off;
853
854 if (t->parms.link != p->link || t->fwmark != fwmark) {
855 int mtu;
856
857 t->parms.link = p->link;
858 t->fwmark = fwmark;
859 mtu = ip_tunnel_bind_dev(dev);
860 if (set_mtu)
861 dev->mtu = mtu;
862 }
863 dst_cache_reset(&t->dst_cache);
864 netdev_state_change(dev);
865 }
866
867 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
868 {
869 int err = 0;
870 struct ip_tunnel *t = netdev_priv(dev);
871 struct net *net = t->net;
872 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
873
874 switch (cmd) {
875 case SIOCGETTUNNEL:
876 if (dev == itn->fb_tunnel_dev) {
877 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
878 if (!t)
879 t = netdev_priv(dev);
880 }
881 memcpy(p, &t->parms, sizeof(*p));
882 break;
883
884 case SIOCADDTUNNEL:
885 case SIOCCHGTUNNEL:
886 err = -EPERM;
887 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
888 goto done;
889 if (p->iph.ttl)
890 p->iph.frag_off |= htons(IP_DF);
891 if (!(p->i_flags & VTI_ISVTI)) {
892 if (!(p->i_flags & TUNNEL_KEY))
893 p->i_key = 0;
894 if (!(p->o_flags & TUNNEL_KEY))
895 p->o_key = 0;
896 }
897
898 t = ip_tunnel_find(itn, p, itn->type);
899
900 if (cmd == SIOCADDTUNNEL) {
901 if (!t) {
902 t = ip_tunnel_create(net, itn, p);
903 err = PTR_ERR_OR_ZERO(t);
904 break;
905 }
906
907 err = -EEXIST;
908 break;
909 }
910 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
911 if (t) {
912 if (t->dev != dev) {
913 err = -EEXIST;
914 break;
915 }
916 } else {
917 unsigned int nflags = 0;
918
919 if (ipv4_is_multicast(p->iph.daddr))
920 nflags = IFF_BROADCAST;
921 else if (p->iph.daddr)
922 nflags = IFF_POINTOPOINT;
923
924 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
925 err = -EINVAL;
926 break;
927 }
928
929 t = netdev_priv(dev);
930 }
931 }
932
933 if (t) {
934 err = 0;
935 ip_tunnel_update(itn, t, dev, p, true, 0);
936 } else {
937 err = -ENOENT;
938 }
939 break;
940
941 case SIOCDELTUNNEL:
942 err = -EPERM;
943 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
944 goto done;
945
946 if (dev == itn->fb_tunnel_dev) {
947 err = -ENOENT;
948 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
949 if (!t)
950 goto done;
951 err = -EPERM;
952 if (t == netdev_priv(itn->fb_tunnel_dev))
953 goto done;
954 dev = t->dev;
955 }
956 unregister_netdevice(dev);
957 err = 0;
958 break;
959
960 default:
961 err = -EINVAL;
962 }
963
964 done:
965 return err;
966 }
967 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
968
969 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
970 {
971 struct ip_tunnel *tunnel = netdev_priv(dev);
972 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
973 int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
974
975 if (new_mtu < ETH_MIN_MTU)
976 return -EINVAL;
977
978 if (new_mtu > max_mtu) {
979 if (strict)
980 return -EINVAL;
981
982 new_mtu = max_mtu;
983 }
984
985 dev->mtu = new_mtu;
986 return 0;
987 }
988 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
989
990 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
991 {
992 return __ip_tunnel_change_mtu(dev, new_mtu, true);
993 }
994 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
995
996 static void ip_tunnel_dev_free(struct net_device *dev)
997 {
998 struct ip_tunnel *tunnel = netdev_priv(dev);
999
1000 gro_cells_destroy(&tunnel->gro_cells);
1001 dst_cache_destroy(&tunnel->dst_cache);
1002 free_percpu(dev->tstats);
1003 }
1004
1005 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1006 {
1007 struct ip_tunnel *tunnel = netdev_priv(dev);
1008 struct ip_tunnel_net *itn;
1009
1010 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1011
1012 if (itn->fb_tunnel_dev != dev) {
1013 ip_tunnel_del(itn, netdev_priv(dev));
1014 unregister_netdevice_queue(dev, head);
1015 }
1016 }
1017 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1018
1019 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1020 {
1021 struct ip_tunnel *tunnel = netdev_priv(dev);
1022
1023 return tunnel->net;
1024 }
1025 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1026
1027 int ip_tunnel_get_iflink(const struct net_device *dev)
1028 {
1029 struct ip_tunnel *tunnel = netdev_priv(dev);
1030
1031 return tunnel->parms.link;
1032 }
1033 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1034
1035 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1036 struct rtnl_link_ops *ops, char *devname)
1037 {
1038 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1039 struct ip_tunnel_parm parms;
1040 unsigned int i;
1041
1042 itn->rtnl_link_ops = ops;
1043 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1044 INIT_HLIST_HEAD(&itn->tunnels[i]);
1045
1046 if (!ops || !net_has_fallback_tunnels(net)) {
1047 struct ip_tunnel_net *it_init_net;
1048
1049 it_init_net = net_generic(&init_net, ip_tnl_net_id);
1050 itn->type = it_init_net->type;
1051 itn->fb_tunnel_dev = NULL;
1052 return 0;
1053 }
1054
1055 memset(&parms, 0, sizeof(parms));
1056 if (devname)
1057 strlcpy(parms.name, devname, IFNAMSIZ);
1058
1059 rtnl_lock();
1060 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1061 /* FB netdevice is special: we have one, and only one per netns.
1062 * Allowing to move it to another netns is clearly unsafe.
1063 */
1064 if (!IS_ERR(itn->fb_tunnel_dev)) {
1065 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1066 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1067 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1068 itn->type = itn->fb_tunnel_dev->type;
1069 }
1070 rtnl_unlock();
1071
1072 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1073 }
1074 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1075
1076 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1077 struct list_head *head,
1078 struct rtnl_link_ops *ops)
1079 {
1080 struct net_device *dev, *aux;
1081 int h;
1082
1083 for_each_netdev_safe(net, dev, aux)
1084 if (dev->rtnl_link_ops == ops)
1085 unregister_netdevice_queue(dev, head);
1086
1087 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1088 struct ip_tunnel *t;
1089 struct hlist_node *n;
1090 struct hlist_head *thead = &itn->tunnels[h];
1091
1092 hlist_for_each_entry_safe(t, n, thead, hash_node)
1093 /* If dev is in the same netns, it has already
1094 * been added to the list by the previous loop.
1095 */
1096 if (!net_eq(dev_net(t->dev), net))
1097 unregister_netdevice_queue(t->dev, head);
1098 }
1099 }
1100
1101 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1102 struct rtnl_link_ops *ops)
1103 {
1104 struct ip_tunnel_net *itn;
1105 struct net *net;
1106 LIST_HEAD(list);
1107
1108 rtnl_lock();
1109 list_for_each_entry(net, net_list, exit_list) {
1110 itn = net_generic(net, id);
1111 ip_tunnel_destroy(net, itn, &list, ops);
1112 }
1113 unregister_netdevice_many(&list);
1114 rtnl_unlock();
1115 }
1116 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1117
1118 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1119 struct ip_tunnel_parm *p, __u32 fwmark)
1120 {
1121 struct ip_tunnel *nt;
1122 struct net *net = dev_net(dev);
1123 struct ip_tunnel_net *itn;
1124 int mtu;
1125 int err;
1126
1127 nt = netdev_priv(dev);
1128 itn = net_generic(net, nt->ip_tnl_net_id);
1129
1130 if (nt->collect_md) {
1131 if (rtnl_dereference(itn->collect_md_tun))
1132 return -EEXIST;
1133 } else {
1134 if (ip_tunnel_find(itn, p, dev->type))
1135 return -EEXIST;
1136 }
1137
1138 nt->net = net;
1139 nt->parms = *p;
1140 nt->fwmark = fwmark;
1141 err = register_netdevice(dev);
1142 if (err)
1143 goto err_register_netdevice;
1144
1145 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1146 eth_hw_addr_random(dev);
1147
1148 mtu = ip_tunnel_bind_dev(dev);
1149 if (tb[IFLA_MTU]) {
1150 unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen;
1151
1152 mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU,
1153 (unsigned int)(max - sizeof(struct iphdr)));
1154 }
1155
1156 err = dev_set_mtu(dev, mtu);
1157 if (err)
1158 goto err_dev_set_mtu;
1159
1160 ip_tunnel_add(itn, nt);
1161 return 0;
1162
1163 err_dev_set_mtu:
1164 unregister_netdevice(dev);
1165 err_register_netdevice:
1166 return err;
1167 }
1168 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1169
1170 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1171 struct ip_tunnel_parm *p, __u32 fwmark)
1172 {
1173 struct ip_tunnel *t;
1174 struct ip_tunnel *tunnel = netdev_priv(dev);
1175 struct net *net = tunnel->net;
1176 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1177
1178 if (dev == itn->fb_tunnel_dev)
1179 return -EINVAL;
1180
1181 t = ip_tunnel_find(itn, p, dev->type);
1182
1183 if (t) {
1184 if (t->dev != dev)
1185 return -EEXIST;
1186 } else {
1187 t = tunnel;
1188
1189 if (dev->type != ARPHRD_ETHER) {
1190 unsigned int nflags = 0;
1191
1192 if (ipv4_is_multicast(p->iph.daddr))
1193 nflags = IFF_BROADCAST;
1194 else if (p->iph.daddr)
1195 nflags = IFF_POINTOPOINT;
1196
1197 if ((dev->flags ^ nflags) &
1198 (IFF_POINTOPOINT | IFF_BROADCAST))
1199 return -EINVAL;
1200 }
1201 }
1202
1203 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1204 return 0;
1205 }
1206 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1207
1208 int ip_tunnel_init(struct net_device *dev)
1209 {
1210 struct ip_tunnel *tunnel = netdev_priv(dev);
1211 struct iphdr *iph = &tunnel->parms.iph;
1212 int err;
1213
1214 dev->needs_free_netdev = true;
1215 dev->priv_destructor = ip_tunnel_dev_free;
1216 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1217 if (!dev->tstats)
1218 return -ENOMEM;
1219
1220 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1221 if (err) {
1222 free_percpu(dev->tstats);
1223 return err;
1224 }
1225
1226 err = gro_cells_init(&tunnel->gro_cells, dev);
1227 if (err) {
1228 dst_cache_destroy(&tunnel->dst_cache);
1229 free_percpu(dev->tstats);
1230 return err;
1231 }
1232
1233 tunnel->dev = dev;
1234 tunnel->net = dev_net(dev);
1235 strcpy(tunnel->parms.name, dev->name);
1236 iph->version = 4;
1237 iph->ihl = 5;
1238
1239 if (tunnel->collect_md) {
1240 dev->features |= NETIF_F_NETNS_LOCAL;
1241 netif_keep_dst(dev);
1242 }
1243 return 0;
1244 }
1245 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1246
1247 void ip_tunnel_uninit(struct net_device *dev)
1248 {
1249 struct ip_tunnel *tunnel = netdev_priv(dev);
1250 struct net *net = tunnel->net;
1251 struct ip_tunnel_net *itn;
1252
1253 itn = net_generic(net, tunnel->ip_tnl_net_id);
1254 /* fb_tunnel_dev will be unregisted in net-exit call. */
1255 if (itn->fb_tunnel_dev != dev)
1256 ip_tunnel_del(itn, netdev_priv(dev));
1257
1258 dst_cache_reset(&tunnel->dst_cache);
1259 }
1260 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1261
1262 /* Do least required initialization, rest of init is done in tunnel_init call */
1263 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1264 {
1265 struct ip_tunnel *tunnel = netdev_priv(dev);
1266 tunnel->ip_tnl_net_id = net_id;
1267 }
1268 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1269
1270 MODULE_LICENSE("GPL");