1 // SPDX-License-Identifier: GPL-2.0
3 #include <linux/types.h>
4 #include <linux/netfilter.h>
5 #include <linux/module.h>
6 #include <linux/slab.h>
7 #include <linux/mutex.h>
8 #include <linux/vmalloc.h>
9 #include <linux/stddef.h>
10 #include <linux/err.h>
11 #include <linux/percpu.h>
12 #include <linux/notifier.h>
13 #include <linux/kernel.h>
14 #include <linux/netdevice.h>
16 #include <net/netfilter/nf_conntrack.h>
17 #include <net/netfilter/nf_conntrack_l4proto.h>
18 #include <net/netfilter/nf_conntrack_core.h>
19 #include <net/netfilter/nf_log.h>
22 #include <linux/icmp.h>
23 #include <linux/sysctl.h>
24 #include <net/route.h>
27 #include <linux/netfilter_ipv4.h>
28 #include <linux/netfilter_ipv6.h>
29 #include <linux/netfilter_ipv6/ip6_tables.h>
30 #include <net/netfilter/nf_conntrack_helper.h>
31 #include <net/netfilter/nf_conntrack_zones.h>
32 #include <net/netfilter/nf_conntrack_seqadj.h>
33 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
34 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
35 #include <net/netfilter/nf_nat_helper.h>
36 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
37 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
39 #include <linux/ipv6.h>
40 #include <linux/in6.h>
42 #include <net/inet_frag.h>
44 extern unsigned int nf_conntrack_net_id
;
46 static DEFINE_MUTEX(nf_ct_proto_mutex
);
50 void nf_l4proto_log_invalid(const struct sk_buff
*skb
,
58 if (net
->ct
.sysctl_log_invalid
!= protonum
||
59 net
->ct
.sysctl_log_invalid
!= IPPROTO_RAW
)
66 nf_log_packet(net
, pf
, 0, skb
, NULL
, NULL
, NULL
,
67 "nf_ct_proto_%d: %pV ", protonum
, &vaf
);
70 EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid
);
73 void nf_ct_l4proto_log_invalid(const struct sk_buff
*skb
,
74 const struct nf_conn
*ct
,
82 if (likely(net
->ct
.sysctl_log_invalid
== 0))
89 nf_l4proto_log_invalid(skb
, net
, nf_ct_l3num(ct
),
90 nf_ct_protonum(ct
), "%pV", &vaf
);
93 EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid
);
96 const struct nf_conntrack_l4proto
*nf_ct_l4proto_find(u8 l4proto
)
99 case IPPROTO_UDP
: return &nf_conntrack_l4proto_udp
;
100 case IPPROTO_TCP
: return &nf_conntrack_l4proto_tcp
;
101 case IPPROTO_ICMP
: return &nf_conntrack_l4proto_icmp
;
102 #ifdef CONFIG_NF_CT_PROTO_DCCP
103 case IPPROTO_DCCP
: return &nf_conntrack_l4proto_dccp
;
105 #ifdef CONFIG_NF_CT_PROTO_SCTP
106 case IPPROTO_SCTP
: return &nf_conntrack_l4proto_sctp
;
108 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
109 case IPPROTO_UDPLITE
: return &nf_conntrack_l4proto_udplite
;
111 #ifdef CONFIG_NF_CT_PROTO_GRE
112 case IPPROTO_GRE
: return &nf_conntrack_l4proto_gre
;
114 #if IS_ENABLED(CONFIG_IPV6)
115 case IPPROTO_ICMPV6
: return &nf_conntrack_l4proto_icmpv6
;
116 #endif /* CONFIG_IPV6 */
119 return &nf_conntrack_l4proto_generic
;
121 EXPORT_SYMBOL_GPL(nf_ct_l4proto_find
);
123 static unsigned int nf_confirm(struct sk_buff
*skb
,
124 unsigned int protoff
,
126 enum ip_conntrack_info ctinfo
)
128 const struct nf_conn_help
*help
;
130 help
= nfct_help(ct
);
132 const struct nf_conntrack_helper
*helper
;
135 /* rcu_read_lock()ed by nf_hook_thresh */
136 helper
= rcu_dereference(help
->helper
);
138 ret
= helper
->help(skb
,
141 if (ret
!= NF_ACCEPT
)
146 if (test_bit(IPS_SEQ_ADJUST_BIT
, &ct
->status
) &&
147 !nf_is_loopback_packet(skb
)) {
148 if (!nf_ct_seq_adjust(skb
, ct
, ctinfo
, protoff
)) {
149 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct
), drop
);
154 /* We've seen it coming out the other side: confirm it */
155 return nf_conntrack_confirm(skb
);
158 static unsigned int ipv4_confirm(void *priv
,
160 const struct nf_hook_state
*state
)
162 enum ip_conntrack_info ctinfo
;
165 ct
= nf_ct_get(skb
, &ctinfo
);
166 if (!ct
|| ctinfo
== IP_CT_RELATED_REPLY
)
167 return nf_conntrack_confirm(skb
);
169 return nf_confirm(skb
,
170 skb_network_offset(skb
) + ip_hdrlen(skb
),
174 static unsigned int ipv4_conntrack_in(void *priv
,
176 const struct nf_hook_state
*state
)
178 return nf_conntrack_in(skb
, state
);
181 static unsigned int ipv4_conntrack_local(void *priv
,
183 const struct nf_hook_state
*state
)
185 if (ip_is_fragment(ip_hdr(skb
))) { /* IP_NODEFRAG setsockopt set */
186 enum ip_conntrack_info ctinfo
;
187 struct nf_conn
*tmpl
;
189 tmpl
= nf_ct_get(skb
, &ctinfo
);
190 if (tmpl
&& nf_ct_is_template(tmpl
)) {
191 /* when skipping ct, clear templates to avoid fooling
192 * later targets/matches
200 return nf_conntrack_in(skb
, state
);
203 /* Connection tracking may drop packets, but never alters them, so
204 * make it the first hook.
206 static const struct nf_hook_ops ipv4_conntrack_ops
[] = {
208 .hook
= ipv4_conntrack_in
,
210 .hooknum
= NF_INET_PRE_ROUTING
,
211 .priority
= NF_IP_PRI_CONNTRACK
,
214 .hook
= ipv4_conntrack_local
,
216 .hooknum
= NF_INET_LOCAL_OUT
,
217 .priority
= NF_IP_PRI_CONNTRACK
,
220 .hook
= ipv4_confirm
,
222 .hooknum
= NF_INET_POST_ROUTING
,
223 .priority
= NF_IP_PRI_CONNTRACK_CONFIRM
,
226 .hook
= ipv4_confirm
,
228 .hooknum
= NF_INET_LOCAL_IN
,
229 .priority
= NF_IP_PRI_CONNTRACK_CONFIRM
,
233 /* Fast function for those who don't want to parse /proc (and I don't
235 * Reversing the socket's dst/src point of view gives us the reply
239 getorigdst(struct sock
*sk
, int optval
, void __user
*user
, int *len
)
241 const struct inet_sock
*inet
= inet_sk(sk
);
242 const struct nf_conntrack_tuple_hash
*h
;
243 struct nf_conntrack_tuple tuple
;
245 memset(&tuple
, 0, sizeof(tuple
));
248 tuple
.src
.u3
.ip
= inet
->inet_rcv_saddr
;
249 tuple
.src
.u
.tcp
.port
= inet
->inet_sport
;
250 tuple
.dst
.u3
.ip
= inet
->inet_daddr
;
251 tuple
.dst
.u
.tcp
.port
= inet
->inet_dport
;
252 tuple
.src
.l3num
= PF_INET
;
253 tuple
.dst
.protonum
= sk
->sk_protocol
;
256 /* We only do TCP and SCTP at the moment: is there a better way? */
257 if (tuple
.dst
.protonum
!= IPPROTO_TCP
&&
258 tuple
.dst
.protonum
!= IPPROTO_SCTP
) {
259 pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
263 if ((unsigned int)*len
< sizeof(struct sockaddr_in
)) {
264 pr_debug("SO_ORIGINAL_DST: len %d not %zu\n",
265 *len
, sizeof(struct sockaddr_in
));
269 h
= nf_conntrack_find_get(sock_net(sk
), &nf_ct_zone_dflt
, &tuple
);
271 struct sockaddr_in sin
;
272 struct nf_conn
*ct
= nf_ct_tuplehash_to_ctrack(h
);
274 sin
.sin_family
= AF_INET
;
275 sin
.sin_port
= ct
->tuplehash
[IP_CT_DIR_ORIGINAL
]
276 .tuple
.dst
.u
.tcp
.port
;
277 sin
.sin_addr
.s_addr
= ct
->tuplehash
[IP_CT_DIR_ORIGINAL
]
279 memset(sin
.sin_zero
, 0, sizeof(sin
.sin_zero
));
281 pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
282 &sin
.sin_addr
.s_addr
, ntohs(sin
.sin_port
));
284 if (copy_to_user(user
, &sin
, sizeof(sin
)) != 0)
289 pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
290 &tuple
.src
.u3
.ip
, ntohs(tuple
.src
.u
.tcp
.port
),
291 &tuple
.dst
.u3
.ip
, ntohs(tuple
.dst
.u
.tcp
.port
));
295 static struct nf_sockopt_ops so_getorigdst
= {
297 .get_optmin
= SO_ORIGINAL_DST
,
298 .get_optmax
= SO_ORIGINAL_DST
+ 1,
300 .owner
= THIS_MODULE
,
303 #if IS_ENABLED(CONFIG_IPV6)
305 ipv6_getorigdst(struct sock
*sk
, int optval
, void __user
*user
, int *len
)
307 struct nf_conntrack_tuple tuple
= { .src
.l3num
= NFPROTO_IPV6
};
308 const struct ipv6_pinfo
*inet6
= inet6_sk(sk
);
309 const struct inet_sock
*inet
= inet_sk(sk
);
310 const struct nf_conntrack_tuple_hash
*h
;
311 struct sockaddr_in6 sin6
;
317 tuple
.src
.u3
.in6
= sk
->sk_v6_rcv_saddr
;
318 tuple
.src
.u
.tcp
.port
= inet
->inet_sport
;
319 tuple
.dst
.u3
.in6
= sk
->sk_v6_daddr
;
320 tuple
.dst
.u
.tcp
.port
= inet
->inet_dport
;
321 tuple
.dst
.protonum
= sk
->sk_protocol
;
322 bound_dev_if
= sk
->sk_bound_dev_if
;
323 flow_label
= inet6
->flow_label
;
326 if (tuple
.dst
.protonum
!= IPPROTO_TCP
&&
327 tuple
.dst
.protonum
!= IPPROTO_SCTP
)
330 if (*len
< 0 || (unsigned int)*len
< sizeof(sin6
))
333 h
= nf_conntrack_find_get(sock_net(sk
), &nf_ct_zone_dflt
, &tuple
);
335 pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
336 &tuple
.src
.u3
.ip6
, ntohs(tuple
.src
.u
.tcp
.port
),
337 &tuple
.dst
.u3
.ip6
, ntohs(tuple
.dst
.u
.tcp
.port
));
341 ct
= nf_ct_tuplehash_to_ctrack(h
);
343 sin6
.sin6_family
= AF_INET6
;
344 sin6
.sin6_port
= ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
.dst
.u
.tcp
.port
;
345 sin6
.sin6_flowinfo
= flow_label
& IPV6_FLOWINFO_MASK
;
346 memcpy(&sin6
.sin6_addr
,
347 &ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
.dst
.u3
.in6
,
348 sizeof(sin6
.sin6_addr
));
351 sin6
.sin6_scope_id
= ipv6_iface_scope_id(&sin6
.sin6_addr
, bound_dev_if
);
352 return copy_to_user(user
, &sin6
, sizeof(sin6
)) ? -EFAULT
: 0;
355 static struct nf_sockopt_ops so_getorigdst6
= {
357 .get_optmin
= IP6T_SO_ORIGINAL_DST
,
358 .get_optmax
= IP6T_SO_ORIGINAL_DST
+ 1,
359 .get
= ipv6_getorigdst
,
360 .owner
= THIS_MODULE
,
363 static unsigned int ipv6_confirm(void *priv
,
365 const struct nf_hook_state
*state
)
368 enum ip_conntrack_info ctinfo
;
369 unsigned char pnum
= ipv6_hdr(skb
)->nexthdr
;
373 ct
= nf_ct_get(skb
, &ctinfo
);
374 if (!ct
|| ctinfo
== IP_CT_RELATED_REPLY
)
375 return nf_conntrack_confirm(skb
);
377 protoff
= ipv6_skip_exthdr(skb
, sizeof(struct ipv6hdr
), &pnum
,
379 if (protoff
< 0 || (frag_off
& htons(~0x7)) != 0) {
380 pr_debug("proto header not found\n");
381 return nf_conntrack_confirm(skb
);
384 return nf_confirm(skb
, protoff
, ct
, ctinfo
);
387 static unsigned int ipv6_conntrack_in(void *priv
,
389 const struct nf_hook_state
*state
)
391 return nf_conntrack_in(skb
, state
);
394 static unsigned int ipv6_conntrack_local(void *priv
,
396 const struct nf_hook_state
*state
)
398 return nf_conntrack_in(skb
, state
);
401 static const struct nf_hook_ops ipv6_conntrack_ops
[] = {
403 .hook
= ipv6_conntrack_in
,
405 .hooknum
= NF_INET_PRE_ROUTING
,
406 .priority
= NF_IP6_PRI_CONNTRACK
,
409 .hook
= ipv6_conntrack_local
,
411 .hooknum
= NF_INET_LOCAL_OUT
,
412 .priority
= NF_IP6_PRI_CONNTRACK
,
415 .hook
= ipv6_confirm
,
417 .hooknum
= NF_INET_POST_ROUTING
,
418 .priority
= NF_IP6_PRI_LAST
,
421 .hook
= ipv6_confirm
,
423 .hooknum
= NF_INET_LOCAL_IN
,
424 .priority
= NF_IP6_PRI_LAST
- 1,
429 static int nf_ct_tcp_fixup(struct nf_conn
*ct
, void *_nfproto
)
431 u8 nfproto
= (unsigned long)_nfproto
;
433 if (nf_ct_l3num(ct
) != nfproto
)
436 if (nf_ct_protonum(ct
) == IPPROTO_TCP
&&
437 ct
->proto
.tcp
.state
== TCP_CONNTRACK_ESTABLISHED
) {
438 ct
->proto
.tcp
.seen
[0].td_maxwin
= 0;
439 ct
->proto
.tcp
.seen
[1].td_maxwin
= 0;
445 static int nf_ct_netns_do_get(struct net
*net
, u8 nfproto
)
447 struct nf_conntrack_net
*cnet
= net_generic(net
, nf_conntrack_net_id
);
448 bool fixup_needed
= false;
451 mutex_lock(&nf_ct_proto_mutex
);
456 if (cnet
->users4
> 1)
458 err
= nf_defrag_ipv4_enable(net
);
464 err
= nf_register_net_hooks(net
, ipv4_conntrack_ops
,
465 ARRAY_SIZE(ipv4_conntrack_ops
));
471 #if IS_ENABLED(CONFIG_IPV6)
474 if (cnet
->users6
> 1)
476 err
= nf_defrag_ipv6_enable(net
);
482 err
= nf_register_net_hooks(net
, ipv6_conntrack_ops
,
483 ARRAY_SIZE(ipv6_conntrack_ops
));
495 mutex_unlock(&nf_ct_proto_mutex
);
498 nf_ct_iterate_cleanup_net(net
, nf_ct_tcp_fixup
,
499 (void *)(unsigned long)nfproto
, 0, 0);
504 static void nf_ct_netns_do_put(struct net
*net
, u8 nfproto
)
506 struct nf_conntrack_net
*cnet
= net_generic(net
, nf_conntrack_net_id
);
508 mutex_lock(&nf_ct_proto_mutex
);
511 if (cnet
->users4
&& (--cnet
->users4
== 0))
512 nf_unregister_net_hooks(net
, ipv4_conntrack_ops
,
513 ARRAY_SIZE(ipv4_conntrack_ops
));
515 #if IS_ENABLED(CONFIG_IPV6)
517 if (cnet
->users6
&& (--cnet
->users6
== 0))
518 nf_unregister_net_hooks(net
, ipv6_conntrack_ops
,
519 ARRAY_SIZE(ipv6_conntrack_ops
));
524 mutex_unlock(&nf_ct_proto_mutex
);
527 int nf_ct_netns_get(struct net
*net
, u8 nfproto
)
531 if (nfproto
== NFPROTO_INET
) {
532 err
= nf_ct_netns_do_get(net
, NFPROTO_IPV4
);
535 err
= nf_ct_netns_do_get(net
, NFPROTO_IPV6
);
539 err
= nf_ct_netns_do_get(net
, nfproto
);
546 nf_ct_netns_put(net
, NFPROTO_IPV4
);
550 EXPORT_SYMBOL_GPL(nf_ct_netns_get
);
552 void nf_ct_netns_put(struct net
*net
, uint8_t nfproto
)
554 if (nfproto
== NFPROTO_INET
) {
555 nf_ct_netns_do_put(net
, NFPROTO_IPV4
);
556 nf_ct_netns_do_put(net
, NFPROTO_IPV6
);
558 nf_ct_netns_do_put(net
, nfproto
);
561 EXPORT_SYMBOL_GPL(nf_ct_netns_put
);
563 int nf_conntrack_proto_init(void)
567 ret
= nf_register_sockopt(&so_getorigdst
);
571 #if IS_ENABLED(CONFIG_IPV6)
572 ret
= nf_register_sockopt(&so_getorigdst6
);
574 goto cleanup_sockopt
;
579 #if IS_ENABLED(CONFIG_IPV6)
581 nf_unregister_sockopt(&so_getorigdst6
);
586 void nf_conntrack_proto_fini(void)
588 nf_unregister_sockopt(&so_getorigdst
);
589 #if IS_ENABLED(CONFIG_IPV6)
590 nf_unregister_sockopt(&so_getorigdst6
);
594 void nf_conntrack_proto_pernet_init(struct net
*net
)
596 nf_conntrack_generic_init_net(net
);
597 nf_conntrack_udp_init_net(net
);
598 nf_conntrack_tcp_init_net(net
);
599 nf_conntrack_icmp_init_net(net
);
600 #if IS_ENABLED(CONFIG_IPV6)
601 nf_conntrack_icmpv6_init_net(net
);
603 #ifdef CONFIG_NF_CT_PROTO_DCCP
604 nf_conntrack_dccp_init_net(net
);
606 #ifdef CONFIG_NF_CT_PROTO_SCTP
607 nf_conntrack_sctp_init_net(net
);
609 #ifdef CONFIG_NF_CT_PROTO_GRE
610 nf_conntrack_gre_init_net(net
);
614 void nf_conntrack_proto_pernet_fini(struct net
*net
)
616 #ifdef CONFIG_NF_CT_PROTO_GRE
617 nf_ct_gre_keymap_flush(net
);
621 module_param_call(hashsize
, nf_conntrack_set_hashsize
, param_get_uint
,
622 &nf_conntrack_htable_size
, 0600);
624 MODULE_ALIAS("ip_conntrack");
625 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET
));
626 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6
));
627 MODULE_LICENSE("GPL");