]> git.ipfire.org Git - thirdparty/linux.git/blob - net/netfilter/nf_conntrack_proto.c
mm/hotplug: treat CMA pages as unmovable
[thirdparty/linux.git] / net / netfilter / nf_conntrack_proto.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 #include <linux/types.h>
4 #include <linux/netfilter.h>
5 #include <linux/module.h>
6 #include <linux/slab.h>
7 #include <linux/mutex.h>
8 #include <linux/vmalloc.h>
9 #include <linux/stddef.h>
10 #include <linux/err.h>
11 #include <linux/percpu.h>
12 #include <linux/notifier.h>
13 #include <linux/kernel.h>
14 #include <linux/netdevice.h>
15
16 #include <net/netfilter/nf_conntrack.h>
17 #include <net/netfilter/nf_conntrack_l4proto.h>
18 #include <net/netfilter/nf_conntrack_core.h>
19 #include <net/netfilter/nf_log.h>
20
21 #include <linux/ip.h>
22 #include <linux/icmp.h>
23 #include <linux/sysctl.h>
24 #include <net/route.h>
25 #include <net/ip.h>
26
27 #include <linux/netfilter_ipv4.h>
28 #include <linux/netfilter_ipv6.h>
29 #include <linux/netfilter_ipv6/ip6_tables.h>
30 #include <net/netfilter/nf_conntrack_helper.h>
31 #include <net/netfilter/nf_conntrack_zones.h>
32 #include <net/netfilter/nf_conntrack_seqadj.h>
33 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
34 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
35 #include <net/netfilter/nf_nat_helper.h>
36 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
37 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
38
39 #include <linux/ipv6.h>
40 #include <linux/in6.h>
41 #include <net/ipv6.h>
42 #include <net/inet_frag.h>
43
44 extern unsigned int nf_conntrack_net_id;
45
46 static DEFINE_MUTEX(nf_ct_proto_mutex);
47
48 #ifdef CONFIG_SYSCTL
49 __printf(5, 6)
50 void nf_l4proto_log_invalid(const struct sk_buff *skb,
51 struct net *net,
52 u16 pf, u8 protonum,
53 const char *fmt, ...)
54 {
55 struct va_format vaf;
56 va_list args;
57
58 if (net->ct.sysctl_log_invalid != protonum ||
59 net->ct.sysctl_log_invalid != IPPROTO_RAW)
60 return;
61
62 va_start(args, fmt);
63 vaf.fmt = fmt;
64 vaf.va = &args;
65
66 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
67 "nf_ct_proto_%d: %pV ", protonum, &vaf);
68 va_end(args);
69 }
70 EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid);
71
72 __printf(3, 4)
73 void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
74 const struct nf_conn *ct,
75 const char *fmt, ...)
76 {
77 struct va_format vaf;
78 struct net *net;
79 va_list args;
80
81 net = nf_ct_net(ct);
82 if (likely(net->ct.sysctl_log_invalid == 0))
83 return;
84
85 va_start(args, fmt);
86 vaf.fmt = fmt;
87 vaf.va = &args;
88
89 nf_l4proto_log_invalid(skb, net, nf_ct_l3num(ct),
90 nf_ct_protonum(ct), "%pV", &vaf);
91 va_end(args);
92 }
93 EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid);
94 #endif
95
96 const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto)
97 {
98 switch (l4proto) {
99 case IPPROTO_UDP: return &nf_conntrack_l4proto_udp;
100 case IPPROTO_TCP: return &nf_conntrack_l4proto_tcp;
101 case IPPROTO_ICMP: return &nf_conntrack_l4proto_icmp;
102 #ifdef CONFIG_NF_CT_PROTO_DCCP
103 case IPPROTO_DCCP: return &nf_conntrack_l4proto_dccp;
104 #endif
105 #ifdef CONFIG_NF_CT_PROTO_SCTP
106 case IPPROTO_SCTP: return &nf_conntrack_l4proto_sctp;
107 #endif
108 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
109 case IPPROTO_UDPLITE: return &nf_conntrack_l4proto_udplite;
110 #endif
111 #ifdef CONFIG_NF_CT_PROTO_GRE
112 case IPPROTO_GRE: return &nf_conntrack_l4proto_gre;
113 #endif
114 #if IS_ENABLED(CONFIG_IPV6)
115 case IPPROTO_ICMPV6: return &nf_conntrack_l4proto_icmpv6;
116 #endif /* CONFIG_IPV6 */
117 }
118
119 return &nf_conntrack_l4proto_generic;
120 };
121 EXPORT_SYMBOL_GPL(nf_ct_l4proto_find);
122
123 static unsigned int nf_confirm(struct sk_buff *skb,
124 unsigned int protoff,
125 struct nf_conn *ct,
126 enum ip_conntrack_info ctinfo)
127 {
128 const struct nf_conn_help *help;
129
130 help = nfct_help(ct);
131 if (help) {
132 const struct nf_conntrack_helper *helper;
133 int ret;
134
135 /* rcu_read_lock()ed by nf_hook_thresh */
136 helper = rcu_dereference(help->helper);
137 if (helper) {
138 ret = helper->help(skb,
139 protoff,
140 ct, ctinfo);
141 if (ret != NF_ACCEPT)
142 return ret;
143 }
144 }
145
146 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
147 !nf_is_loopback_packet(skb)) {
148 if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
149 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
150 return NF_DROP;
151 }
152 }
153
154 /* We've seen it coming out the other side: confirm it */
155 return nf_conntrack_confirm(skb);
156 }
157
158 static unsigned int ipv4_confirm(void *priv,
159 struct sk_buff *skb,
160 const struct nf_hook_state *state)
161 {
162 enum ip_conntrack_info ctinfo;
163 struct nf_conn *ct;
164
165 ct = nf_ct_get(skb, &ctinfo);
166 if (!ct || ctinfo == IP_CT_RELATED_REPLY)
167 return nf_conntrack_confirm(skb);
168
169 return nf_confirm(skb,
170 skb_network_offset(skb) + ip_hdrlen(skb),
171 ct, ctinfo);
172 }
173
174 static unsigned int ipv4_conntrack_in(void *priv,
175 struct sk_buff *skb,
176 const struct nf_hook_state *state)
177 {
178 return nf_conntrack_in(skb, state);
179 }
180
181 static unsigned int ipv4_conntrack_local(void *priv,
182 struct sk_buff *skb,
183 const struct nf_hook_state *state)
184 {
185 if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */
186 enum ip_conntrack_info ctinfo;
187 struct nf_conn *tmpl;
188
189 tmpl = nf_ct_get(skb, &ctinfo);
190 if (tmpl && nf_ct_is_template(tmpl)) {
191 /* when skipping ct, clear templates to avoid fooling
192 * later targets/matches
193 */
194 skb->_nfct = 0;
195 nf_ct_put(tmpl);
196 }
197 return NF_ACCEPT;
198 }
199
200 return nf_conntrack_in(skb, state);
201 }
202
203 /* Connection tracking may drop packets, but never alters them, so
204 * make it the first hook.
205 */
206 static const struct nf_hook_ops ipv4_conntrack_ops[] = {
207 {
208 .hook = ipv4_conntrack_in,
209 .pf = NFPROTO_IPV4,
210 .hooknum = NF_INET_PRE_ROUTING,
211 .priority = NF_IP_PRI_CONNTRACK,
212 },
213 {
214 .hook = ipv4_conntrack_local,
215 .pf = NFPROTO_IPV4,
216 .hooknum = NF_INET_LOCAL_OUT,
217 .priority = NF_IP_PRI_CONNTRACK,
218 },
219 {
220 .hook = ipv4_confirm,
221 .pf = NFPROTO_IPV4,
222 .hooknum = NF_INET_POST_ROUTING,
223 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
224 },
225 {
226 .hook = ipv4_confirm,
227 .pf = NFPROTO_IPV4,
228 .hooknum = NF_INET_LOCAL_IN,
229 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
230 },
231 };
232
233 /* Fast function for those who don't want to parse /proc (and I don't
234 * blame them).
235 * Reversing the socket's dst/src point of view gives us the reply
236 * mapping.
237 */
238 static int
239 getorigdst(struct sock *sk, int optval, void __user *user, int *len)
240 {
241 const struct inet_sock *inet = inet_sk(sk);
242 const struct nf_conntrack_tuple_hash *h;
243 struct nf_conntrack_tuple tuple;
244
245 memset(&tuple, 0, sizeof(tuple));
246
247 lock_sock(sk);
248 tuple.src.u3.ip = inet->inet_rcv_saddr;
249 tuple.src.u.tcp.port = inet->inet_sport;
250 tuple.dst.u3.ip = inet->inet_daddr;
251 tuple.dst.u.tcp.port = inet->inet_dport;
252 tuple.src.l3num = PF_INET;
253 tuple.dst.protonum = sk->sk_protocol;
254 release_sock(sk);
255
256 /* We only do TCP and SCTP at the moment: is there a better way? */
257 if (tuple.dst.protonum != IPPROTO_TCP &&
258 tuple.dst.protonum != IPPROTO_SCTP) {
259 pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
260 return -ENOPROTOOPT;
261 }
262
263 if ((unsigned int)*len < sizeof(struct sockaddr_in)) {
264 pr_debug("SO_ORIGINAL_DST: len %d not %zu\n",
265 *len, sizeof(struct sockaddr_in));
266 return -EINVAL;
267 }
268
269 h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
270 if (h) {
271 struct sockaddr_in sin;
272 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
273
274 sin.sin_family = AF_INET;
275 sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
276 .tuple.dst.u.tcp.port;
277 sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
278 .tuple.dst.u3.ip;
279 memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
280
281 pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
282 &sin.sin_addr.s_addr, ntohs(sin.sin_port));
283 nf_ct_put(ct);
284 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
285 return -EFAULT;
286 else
287 return 0;
288 }
289 pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
290 &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port),
291 &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port));
292 return -ENOENT;
293 }
294
295 static struct nf_sockopt_ops so_getorigdst = {
296 .pf = PF_INET,
297 .get_optmin = SO_ORIGINAL_DST,
298 .get_optmax = SO_ORIGINAL_DST + 1,
299 .get = getorigdst,
300 .owner = THIS_MODULE,
301 };
302
303 #if IS_ENABLED(CONFIG_IPV6)
304 static int
305 ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
306 {
307 struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
308 const struct ipv6_pinfo *inet6 = inet6_sk(sk);
309 const struct inet_sock *inet = inet_sk(sk);
310 const struct nf_conntrack_tuple_hash *h;
311 struct sockaddr_in6 sin6;
312 struct nf_conn *ct;
313 __be32 flow_label;
314 int bound_dev_if;
315
316 lock_sock(sk);
317 tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
318 tuple.src.u.tcp.port = inet->inet_sport;
319 tuple.dst.u3.in6 = sk->sk_v6_daddr;
320 tuple.dst.u.tcp.port = inet->inet_dport;
321 tuple.dst.protonum = sk->sk_protocol;
322 bound_dev_if = sk->sk_bound_dev_if;
323 flow_label = inet6->flow_label;
324 release_sock(sk);
325
326 if (tuple.dst.protonum != IPPROTO_TCP &&
327 tuple.dst.protonum != IPPROTO_SCTP)
328 return -ENOPROTOOPT;
329
330 if (*len < 0 || (unsigned int)*len < sizeof(sin6))
331 return -EINVAL;
332
333 h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
334 if (!h) {
335 pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
336 &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
337 &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
338 return -ENOENT;
339 }
340
341 ct = nf_ct_tuplehash_to_ctrack(h);
342
343 sin6.sin6_family = AF_INET6;
344 sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
345 sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK;
346 memcpy(&sin6.sin6_addr,
347 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
348 sizeof(sin6.sin6_addr));
349
350 nf_ct_put(ct);
351 sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if);
352 return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
353 }
354
355 static struct nf_sockopt_ops so_getorigdst6 = {
356 .pf = NFPROTO_IPV6,
357 .get_optmin = IP6T_SO_ORIGINAL_DST,
358 .get_optmax = IP6T_SO_ORIGINAL_DST + 1,
359 .get = ipv6_getorigdst,
360 .owner = THIS_MODULE,
361 };
362
363 static unsigned int ipv6_confirm(void *priv,
364 struct sk_buff *skb,
365 const struct nf_hook_state *state)
366 {
367 struct nf_conn *ct;
368 enum ip_conntrack_info ctinfo;
369 unsigned char pnum = ipv6_hdr(skb)->nexthdr;
370 __be16 frag_off;
371 int protoff;
372
373 ct = nf_ct_get(skb, &ctinfo);
374 if (!ct || ctinfo == IP_CT_RELATED_REPLY)
375 return nf_conntrack_confirm(skb);
376
377 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
378 &frag_off);
379 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
380 pr_debug("proto header not found\n");
381 return nf_conntrack_confirm(skb);
382 }
383
384 return nf_confirm(skb, protoff, ct, ctinfo);
385 }
386
387 static unsigned int ipv6_conntrack_in(void *priv,
388 struct sk_buff *skb,
389 const struct nf_hook_state *state)
390 {
391 return nf_conntrack_in(skb, state);
392 }
393
394 static unsigned int ipv6_conntrack_local(void *priv,
395 struct sk_buff *skb,
396 const struct nf_hook_state *state)
397 {
398 return nf_conntrack_in(skb, state);
399 }
400
401 static const struct nf_hook_ops ipv6_conntrack_ops[] = {
402 {
403 .hook = ipv6_conntrack_in,
404 .pf = NFPROTO_IPV6,
405 .hooknum = NF_INET_PRE_ROUTING,
406 .priority = NF_IP6_PRI_CONNTRACK,
407 },
408 {
409 .hook = ipv6_conntrack_local,
410 .pf = NFPROTO_IPV6,
411 .hooknum = NF_INET_LOCAL_OUT,
412 .priority = NF_IP6_PRI_CONNTRACK,
413 },
414 {
415 .hook = ipv6_confirm,
416 .pf = NFPROTO_IPV6,
417 .hooknum = NF_INET_POST_ROUTING,
418 .priority = NF_IP6_PRI_LAST,
419 },
420 {
421 .hook = ipv6_confirm,
422 .pf = NFPROTO_IPV6,
423 .hooknum = NF_INET_LOCAL_IN,
424 .priority = NF_IP6_PRI_LAST - 1,
425 },
426 };
427 #endif
428
429 static int nf_ct_tcp_fixup(struct nf_conn *ct, void *_nfproto)
430 {
431 u8 nfproto = (unsigned long)_nfproto;
432
433 if (nf_ct_l3num(ct) != nfproto)
434 return 0;
435
436 if (nf_ct_protonum(ct) == IPPROTO_TCP &&
437 ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED) {
438 ct->proto.tcp.seen[0].td_maxwin = 0;
439 ct->proto.tcp.seen[1].td_maxwin = 0;
440 }
441
442 return 0;
443 }
444
445 static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
446 {
447 struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
448 bool fixup_needed = false;
449 int err = 0;
450
451 mutex_lock(&nf_ct_proto_mutex);
452
453 switch (nfproto) {
454 case NFPROTO_IPV4:
455 cnet->users4++;
456 if (cnet->users4 > 1)
457 goto out_unlock;
458 err = nf_defrag_ipv4_enable(net);
459 if (err) {
460 cnet->users4 = 0;
461 goto out_unlock;
462 }
463
464 err = nf_register_net_hooks(net, ipv4_conntrack_ops,
465 ARRAY_SIZE(ipv4_conntrack_ops));
466 if (err)
467 cnet->users4 = 0;
468 else
469 fixup_needed = true;
470 break;
471 #if IS_ENABLED(CONFIG_IPV6)
472 case NFPROTO_IPV6:
473 cnet->users6++;
474 if (cnet->users6 > 1)
475 goto out_unlock;
476 err = nf_defrag_ipv6_enable(net);
477 if (err < 0) {
478 cnet->users6 = 0;
479 goto out_unlock;
480 }
481
482 err = nf_register_net_hooks(net, ipv6_conntrack_ops,
483 ARRAY_SIZE(ipv6_conntrack_ops));
484 if (err)
485 cnet->users6 = 0;
486 else
487 fixup_needed = true;
488 break;
489 #endif
490 default:
491 err = -EPROTO;
492 break;
493 }
494 out_unlock:
495 mutex_unlock(&nf_ct_proto_mutex);
496
497 if (fixup_needed)
498 nf_ct_iterate_cleanup_net(net, nf_ct_tcp_fixup,
499 (void *)(unsigned long)nfproto, 0, 0);
500
501 return err;
502 }
503
504 static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
505 {
506 struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
507
508 mutex_lock(&nf_ct_proto_mutex);
509 switch (nfproto) {
510 case NFPROTO_IPV4:
511 if (cnet->users4 && (--cnet->users4 == 0))
512 nf_unregister_net_hooks(net, ipv4_conntrack_ops,
513 ARRAY_SIZE(ipv4_conntrack_ops));
514 break;
515 #if IS_ENABLED(CONFIG_IPV6)
516 case NFPROTO_IPV6:
517 if (cnet->users6 && (--cnet->users6 == 0))
518 nf_unregister_net_hooks(net, ipv6_conntrack_ops,
519 ARRAY_SIZE(ipv6_conntrack_ops));
520 break;
521 #endif
522 }
523
524 mutex_unlock(&nf_ct_proto_mutex);
525 }
526
527 int nf_ct_netns_get(struct net *net, u8 nfproto)
528 {
529 int err;
530
531 if (nfproto == NFPROTO_INET) {
532 err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
533 if (err < 0)
534 goto err1;
535 err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
536 if (err < 0)
537 goto err2;
538 } else {
539 err = nf_ct_netns_do_get(net, nfproto);
540 if (err < 0)
541 goto err1;
542 }
543 return 0;
544
545 err2:
546 nf_ct_netns_put(net, NFPROTO_IPV4);
547 err1:
548 return err;
549 }
550 EXPORT_SYMBOL_GPL(nf_ct_netns_get);
551
552 void nf_ct_netns_put(struct net *net, uint8_t nfproto)
553 {
554 if (nfproto == NFPROTO_INET) {
555 nf_ct_netns_do_put(net, NFPROTO_IPV4);
556 nf_ct_netns_do_put(net, NFPROTO_IPV6);
557 } else {
558 nf_ct_netns_do_put(net, nfproto);
559 }
560 }
561 EXPORT_SYMBOL_GPL(nf_ct_netns_put);
562
563 int nf_conntrack_proto_init(void)
564 {
565 int ret;
566
567 ret = nf_register_sockopt(&so_getorigdst);
568 if (ret < 0)
569 return ret;
570
571 #if IS_ENABLED(CONFIG_IPV6)
572 ret = nf_register_sockopt(&so_getorigdst6);
573 if (ret < 0)
574 goto cleanup_sockopt;
575 #endif
576
577 return ret;
578
579 #if IS_ENABLED(CONFIG_IPV6)
580 cleanup_sockopt:
581 nf_unregister_sockopt(&so_getorigdst6);
582 #endif
583 return ret;
584 }
585
586 void nf_conntrack_proto_fini(void)
587 {
588 nf_unregister_sockopt(&so_getorigdst);
589 #if IS_ENABLED(CONFIG_IPV6)
590 nf_unregister_sockopt(&so_getorigdst6);
591 #endif
592 }
593
594 void nf_conntrack_proto_pernet_init(struct net *net)
595 {
596 nf_conntrack_generic_init_net(net);
597 nf_conntrack_udp_init_net(net);
598 nf_conntrack_tcp_init_net(net);
599 nf_conntrack_icmp_init_net(net);
600 #if IS_ENABLED(CONFIG_IPV6)
601 nf_conntrack_icmpv6_init_net(net);
602 #endif
603 #ifdef CONFIG_NF_CT_PROTO_DCCP
604 nf_conntrack_dccp_init_net(net);
605 #endif
606 #ifdef CONFIG_NF_CT_PROTO_SCTP
607 nf_conntrack_sctp_init_net(net);
608 #endif
609 #ifdef CONFIG_NF_CT_PROTO_GRE
610 nf_conntrack_gre_init_net(net);
611 #endif
612 }
613
614 void nf_conntrack_proto_pernet_fini(struct net *net)
615 {
616 #ifdef CONFIG_NF_CT_PROTO_GRE
617 nf_ct_gre_keymap_flush(net);
618 #endif
619 }
620
621 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
622 &nf_conntrack_htable_size, 0600);
623
624 MODULE_ALIAS("ip_conntrack");
625 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
626 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
627 MODULE_LICENSE("GPL");