1 /* netfilter.c: look after the filters for various protocols.
2 * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
4 * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
9 #include <linux/kernel.h>
10 #include <linux/netfilter.h>
11 #include <net/protocol.h>
12 #include <linux/init.h>
13 #include <linux/skbuff.h>
14 #include <linux/wait.h>
15 #include <linux/module.h>
16 #include <linux/interrupt.h>
18 #include <linux/netdevice.h>
19 #include <linux/netfilter_ipv6.h>
20 #include <linux/inetdevice.h>
21 #include <linux/proc_fs.h>
22 #include <linux/mutex.h>
24 #include <linux/rcupdate.h>
25 #include <net/net_namespace.h>
28 #include "nf_internals.h"
30 const struct nf_ipv6_ops __rcu
*nf_ipv6_ops __read_mostly
;
31 EXPORT_SYMBOL_GPL(nf_ipv6_ops
);
33 DEFINE_PER_CPU(bool, nf_skb_duplicated
);
34 EXPORT_SYMBOL_GPL(nf_skb_duplicated
);
36 #ifdef CONFIG_JUMP_LABEL
37 struct static_key nf_hooks_needed
[NFPROTO_NUMPROTO
][NF_MAX_HOOKS
];
38 EXPORT_SYMBOL(nf_hooks_needed
);
41 static DEFINE_MUTEX(nf_hook_mutex
);
43 /* max hooks per family/hooknum */
44 #define MAX_HOOK_COUNT 1024
46 #define nf_entry_dereference(e) \
47 rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
49 static struct nf_hook_entries
*allocate_hook_entries_size(u16 num
)
51 struct nf_hook_entries
*e
;
52 size_t alloc
= sizeof(*e
) +
53 sizeof(struct nf_hook_entry
) * num
+
54 sizeof(struct nf_hook_ops
*) * num
+
55 sizeof(struct nf_hook_entries_rcu_head
);
60 e
= kvzalloc(alloc
, GFP_KERNEL
);
62 e
->num_hook_entries
= num
;
66 static void __nf_hook_entries_free(struct rcu_head
*h
)
68 struct nf_hook_entries_rcu_head
*head
;
70 head
= container_of(h
, struct nf_hook_entries_rcu_head
, head
);
71 kvfree(head
->allocation
);
74 static void nf_hook_entries_free(struct nf_hook_entries
*e
)
76 struct nf_hook_entries_rcu_head
*head
;
77 struct nf_hook_ops
**ops
;
83 num
= e
->num_hook_entries
;
84 ops
= nf_hook_entries_get_hook_ops(e
);
85 head
= (void *)&ops
[num
];
87 call_rcu(&head
->head
, __nf_hook_entries_free
);
90 static unsigned int accept_all(void *priv
,
92 const struct nf_hook_state
*state
)
94 return NF_ACCEPT
; /* ACCEPT makes nf_hook_slow call next hook */
97 static const struct nf_hook_ops dummy_ops
= {
102 static struct nf_hook_entries
*
103 nf_hook_entries_grow(const struct nf_hook_entries
*old
,
104 const struct nf_hook_ops
*reg
)
106 unsigned int i
, alloc_entries
, nhooks
, old_entries
;
107 struct nf_hook_ops
**orig_ops
= NULL
;
108 struct nf_hook_ops
**new_ops
;
109 struct nf_hook_entries
*new;
110 bool inserted
= false;
113 old_entries
= old
? old
->num_hook_entries
: 0;
116 orig_ops
= nf_hook_entries_get_hook_ops(old
);
118 for (i
= 0; i
< old_entries
; i
++) {
119 if (orig_ops
[i
] != &dummy_ops
)
124 if (alloc_entries
> MAX_HOOK_COUNT
)
125 return ERR_PTR(-E2BIG
);
127 new = allocate_hook_entries_size(alloc_entries
);
129 return ERR_PTR(-ENOMEM
);
131 new_ops
= nf_hook_entries_get_hook_ops(new);
135 while (i
< old_entries
) {
136 if (orig_ops
[i
] == &dummy_ops
) {
141 if (inserted
|| reg
->priority
> orig_ops
[i
]->priority
) {
142 new_ops
[nhooks
] = (void *)orig_ops
[i
];
143 new->hooks
[nhooks
] = old
->hooks
[i
];
146 new_ops
[nhooks
] = (void *)reg
;
147 new->hooks
[nhooks
].hook
= reg
->hook
;
148 new->hooks
[nhooks
].priv
= reg
->priv
;
155 new_ops
[nhooks
] = (void *)reg
;
156 new->hooks
[nhooks
].hook
= reg
->hook
;
157 new->hooks
[nhooks
].priv
= reg
->priv
;
163 static void hooks_validate(const struct nf_hook_entries
*hooks
)
165 #ifdef CONFIG_DEBUG_KERNEL
166 struct nf_hook_ops
**orig_ops
;
170 orig_ops
= nf_hook_entries_get_hook_ops(hooks
);
172 for (i
= 0; i
< hooks
->num_hook_entries
; i
++) {
173 if (orig_ops
[i
] == &dummy_ops
)
176 WARN_ON(orig_ops
[i
]->priority
< prio
);
178 if (orig_ops
[i
]->priority
> prio
)
179 prio
= orig_ops
[i
]->priority
;
184 int nf_hook_entries_insert_raw(struct nf_hook_entries __rcu
**pp
,
185 const struct nf_hook_ops
*reg
)
187 struct nf_hook_entries
*new_hooks
;
188 struct nf_hook_entries
*p
;
190 p
= rcu_dereference_raw(*pp
);
191 new_hooks
= nf_hook_entries_grow(p
, reg
);
192 if (IS_ERR(new_hooks
))
193 return PTR_ERR(new_hooks
);
195 hooks_validate(new_hooks
);
197 rcu_assign_pointer(*pp
, new_hooks
);
199 BUG_ON(p
== new_hooks
);
200 nf_hook_entries_free(p
);
203 EXPORT_SYMBOL_GPL(nf_hook_entries_insert_raw
);
206 * __nf_hook_entries_try_shrink - try to shrink hook array
208 * @old -- current hook blob at @pp
209 * @pp -- location of hook blob
211 * Hook unregistration must always succeed, so to-be-removed hooks
212 * are replaced by a dummy one that will just move to next hook.
214 * This counts the current dummy hooks, attempts to allocate new blob,
215 * copies the live hooks, then replaces and discards old one.
219 * Returns address to free, or NULL.
221 static void *__nf_hook_entries_try_shrink(struct nf_hook_entries
*old
,
222 struct nf_hook_entries __rcu
**pp
)
224 unsigned int i
, j
, skip
= 0, hook_entries
;
225 struct nf_hook_entries
*new = NULL
;
226 struct nf_hook_ops
**orig_ops
;
227 struct nf_hook_ops
**new_ops
;
229 if (WARN_ON_ONCE(!old
))
232 orig_ops
= nf_hook_entries_get_hook_ops(old
);
233 for (i
= 0; i
< old
->num_hook_entries
; i
++) {
234 if (orig_ops
[i
] == &dummy_ops
)
238 /* if skip == hook_entries all hooks have been removed */
239 hook_entries
= old
->num_hook_entries
;
240 if (skip
== hook_entries
)
246 hook_entries
-= skip
;
247 new = allocate_hook_entries_size(hook_entries
);
251 new_ops
= nf_hook_entries_get_hook_ops(new);
252 for (i
= 0, j
= 0; i
< old
->num_hook_entries
; i
++) {
253 if (orig_ops
[i
] == &dummy_ops
)
255 new->hooks
[j
] = old
->hooks
[i
];
256 new_ops
[j
] = (void *)orig_ops
[i
];
261 rcu_assign_pointer(*pp
, new);
265 static struct nf_hook_entries __rcu
**
266 nf_hook_entry_head(struct net
*net
, int pf
, unsigned int hooknum
,
267 struct net_device
*dev
)
272 #ifdef CONFIG_NETFILTER_FAMILY_ARP
274 if (WARN_ON_ONCE(ARRAY_SIZE(net
->nf
.hooks_arp
) <= hooknum
))
276 return net
->nf
.hooks_arp
+ hooknum
;
278 #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
280 if (WARN_ON_ONCE(ARRAY_SIZE(net
->nf
.hooks_bridge
) <= hooknum
))
282 return net
->nf
.hooks_bridge
+ hooknum
;
285 if (WARN_ON_ONCE(ARRAY_SIZE(net
->nf
.hooks_ipv4
) <= hooknum
))
287 return net
->nf
.hooks_ipv4
+ hooknum
;
289 if (WARN_ON_ONCE(ARRAY_SIZE(net
->nf
.hooks_ipv6
) <= hooknum
))
291 return net
->nf
.hooks_ipv6
+ hooknum
;
292 #if IS_ENABLED(CONFIG_DECNET)
294 if (WARN_ON_ONCE(ARRAY_SIZE(net
->nf
.hooks_decnet
) <= hooknum
))
296 return net
->nf
.hooks_decnet
+ hooknum
;
303 #ifdef CONFIG_NETFILTER_INGRESS
304 if (hooknum
== NF_NETDEV_INGRESS
) {
305 if (dev
&& dev_net(dev
) == net
)
306 return &dev
->nf_hooks_ingress
;
313 static int __nf_register_net_hook(struct net
*net
, int pf
,
314 const struct nf_hook_ops
*reg
)
316 struct nf_hook_entries
*p
, *new_hooks
;
317 struct nf_hook_entries __rcu
**pp
;
319 if (pf
== NFPROTO_NETDEV
) {
320 #ifndef CONFIG_NETFILTER_INGRESS
321 if (reg
->hooknum
== NF_NETDEV_INGRESS
)
324 if (reg
->hooknum
!= NF_NETDEV_INGRESS
||
325 !reg
->dev
|| dev_net(reg
->dev
) != net
)
329 pp
= nf_hook_entry_head(net
, pf
, reg
->hooknum
, reg
->dev
);
333 mutex_lock(&nf_hook_mutex
);
335 p
= nf_entry_dereference(*pp
);
336 new_hooks
= nf_hook_entries_grow(p
, reg
);
338 if (!IS_ERR(new_hooks
))
339 rcu_assign_pointer(*pp
, new_hooks
);
341 mutex_unlock(&nf_hook_mutex
);
342 if (IS_ERR(new_hooks
))
343 return PTR_ERR(new_hooks
);
345 hooks_validate(new_hooks
);
346 #ifdef CONFIG_NETFILTER_INGRESS
347 if (pf
== NFPROTO_NETDEV
&& reg
->hooknum
== NF_NETDEV_INGRESS
)
348 net_inc_ingress_queue();
350 #ifdef CONFIG_JUMP_LABEL
351 static_key_slow_inc(&nf_hooks_needed
[pf
][reg
->hooknum
]);
353 BUG_ON(p
== new_hooks
);
354 nf_hook_entries_free(p
);
359 * nf_remove_net_hook - remove a hook from blob
361 * @oldp: current address of hook blob
362 * @unreg: hook to unregister
364 * This cannot fail, hook unregistration must always succeed.
365 * Therefore replace the to-be-removed hook with a dummy hook.
367 static bool nf_remove_net_hook(struct nf_hook_entries
*old
,
368 const struct nf_hook_ops
*unreg
)
370 struct nf_hook_ops
**orig_ops
;
373 orig_ops
= nf_hook_entries_get_hook_ops(old
);
374 for (i
= 0; i
< old
->num_hook_entries
; i
++) {
375 if (orig_ops
[i
] != unreg
)
377 WRITE_ONCE(old
->hooks
[i
].hook
, accept_all
);
378 WRITE_ONCE(orig_ops
[i
], &dummy_ops
);
385 static void __nf_unregister_net_hook(struct net
*net
, int pf
,
386 const struct nf_hook_ops
*reg
)
388 struct nf_hook_entries __rcu
**pp
;
389 struct nf_hook_entries
*p
;
391 pp
= nf_hook_entry_head(net
, pf
, reg
->hooknum
, reg
->dev
);
395 mutex_lock(&nf_hook_mutex
);
397 p
= nf_entry_dereference(*pp
);
398 if (WARN_ON_ONCE(!p
)) {
399 mutex_unlock(&nf_hook_mutex
);
403 if (nf_remove_net_hook(p
, reg
)) {
404 #ifdef CONFIG_NETFILTER_INGRESS
405 if (pf
== NFPROTO_NETDEV
&& reg
->hooknum
== NF_NETDEV_INGRESS
)
406 net_dec_ingress_queue();
408 #ifdef CONFIG_JUMP_LABEL
409 static_key_slow_dec(&nf_hooks_needed
[pf
][reg
->hooknum
]);
412 WARN_ONCE(1, "hook not found, pf %d num %d", pf
, reg
->hooknum
);
415 p
= __nf_hook_entries_try_shrink(p
, pp
);
416 mutex_unlock(&nf_hook_mutex
);
420 nf_queue_nf_hook_drop(net
);
421 nf_hook_entries_free(p
);
424 void nf_unregister_net_hook(struct net
*net
, const struct nf_hook_ops
*reg
)
426 if (reg
->pf
== NFPROTO_INET
) {
427 __nf_unregister_net_hook(net
, NFPROTO_IPV4
, reg
);
428 __nf_unregister_net_hook(net
, NFPROTO_IPV6
, reg
);
430 __nf_unregister_net_hook(net
, reg
->pf
, reg
);
433 EXPORT_SYMBOL(nf_unregister_net_hook
);
435 void nf_hook_entries_delete_raw(struct nf_hook_entries __rcu
**pp
,
436 const struct nf_hook_ops
*reg
)
438 struct nf_hook_entries
*p
;
440 p
= rcu_dereference_raw(*pp
);
441 if (nf_remove_net_hook(p
, reg
)) {
442 p
= __nf_hook_entries_try_shrink(p
, pp
);
443 nf_hook_entries_free(p
);
446 EXPORT_SYMBOL_GPL(nf_hook_entries_delete_raw
);
448 int nf_register_net_hook(struct net
*net
, const struct nf_hook_ops
*reg
)
452 if (reg
->pf
== NFPROTO_INET
) {
453 err
= __nf_register_net_hook(net
, NFPROTO_IPV4
, reg
);
457 err
= __nf_register_net_hook(net
, NFPROTO_IPV6
, reg
);
459 __nf_unregister_net_hook(net
, NFPROTO_IPV4
, reg
);
463 err
= __nf_register_net_hook(net
, reg
->pf
, reg
);
470 EXPORT_SYMBOL(nf_register_net_hook
);
472 int nf_register_net_hooks(struct net
*net
, const struct nf_hook_ops
*reg
,
478 for (i
= 0; i
< n
; i
++) {
479 err
= nf_register_net_hook(net
, ®
[i
]);
487 nf_unregister_net_hooks(net
, reg
, i
);
490 EXPORT_SYMBOL(nf_register_net_hooks
);
492 void nf_unregister_net_hooks(struct net
*net
, const struct nf_hook_ops
*reg
,
493 unsigned int hookcount
)
497 for (i
= 0; i
< hookcount
; i
++)
498 nf_unregister_net_hook(net
, ®
[i
]);
500 EXPORT_SYMBOL(nf_unregister_net_hooks
);
502 /* Returns 1 if okfn() needs to be executed by the caller,
503 * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */
504 int nf_hook_slow(struct sk_buff
*skb
, struct nf_hook_state
*state
,
505 const struct nf_hook_entries
*e
, unsigned int s
)
507 unsigned int verdict
;
510 for (; s
< e
->num_hook_entries
; s
++) {
511 verdict
= nf_hook_entry_hookfn(&e
->hooks
[s
], skb
, state
);
512 switch (verdict
& NF_VERDICT_MASK
) {
517 ret
= NF_DROP_GETERR(verdict
);
522 ret
= nf_queue(skb
, state
, e
, s
, verdict
);
527 /* Implicit handling for NF_STOLEN, as well as any other
528 * non conventional verdicts.
536 EXPORT_SYMBOL(nf_hook_slow
);
539 int skb_make_writable(struct sk_buff
*skb
, unsigned int writable_len
)
541 if (writable_len
> skb
->len
)
544 /* Not exclusive use of packet? Must copy. */
545 if (!skb_cloned(skb
)) {
546 if (writable_len
<= skb_headlen(skb
))
548 } else if (skb_clone_writable(skb
, writable_len
))
551 if (writable_len
<= skb_headlen(skb
))
554 writable_len
-= skb_headlen(skb
);
556 return !!__pskb_pull_tail(skb
, writable_len
);
558 EXPORT_SYMBOL(skb_make_writable
);
560 /* This needs to be compiled in any case to avoid dependencies between the
561 * nfnetlink_queue code and nf_conntrack.
563 struct nfnl_ct_hook __rcu
*nfnl_ct_hook __read_mostly
;
564 EXPORT_SYMBOL_GPL(nfnl_ct_hook
);
566 struct nf_ct_hook __rcu
*nf_ct_hook __read_mostly
;
567 EXPORT_SYMBOL_GPL(nf_ct_hook
);
569 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
570 /* This does not belong here, but locally generated errors need it if connection
571 tracking in use: without this, connection may not be in hash table, and hence
572 manufactured ICMP or RST packets will not be associated with it. */
573 void (*ip_ct_attach
)(struct sk_buff
*, const struct sk_buff
*)
575 EXPORT_SYMBOL(ip_ct_attach
);
577 struct nf_nat_hook __rcu
*nf_nat_hook __read_mostly
;
578 EXPORT_SYMBOL_GPL(nf_nat_hook
);
580 void nf_ct_attach(struct sk_buff
*new, const struct sk_buff
*skb
)
582 void (*attach
)(struct sk_buff
*, const struct sk_buff
*);
586 attach
= rcu_dereference(ip_ct_attach
);
592 EXPORT_SYMBOL(nf_ct_attach
);
594 void nf_conntrack_destroy(struct nf_conntrack
*nfct
)
596 struct nf_ct_hook
*ct_hook
;
599 ct_hook
= rcu_dereference(nf_ct_hook
);
600 BUG_ON(ct_hook
== NULL
);
601 ct_hook
->destroy(nfct
);
604 EXPORT_SYMBOL(nf_conntrack_destroy
);
606 bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple
*dst_tuple
,
607 const struct sk_buff
*skb
)
609 struct nf_ct_hook
*ct_hook
;
613 ct_hook
= rcu_dereference(nf_ct_hook
);
615 ret
= ct_hook
->get_tuple_skb(dst_tuple
, skb
);
619 EXPORT_SYMBOL(nf_ct_get_tuple_skb
);
621 /* Built-in default zone used e.g. by modules. */
622 const struct nf_conntrack_zone nf_ct_zone_dflt
= {
623 .id
= NF_CT_DEFAULT_ZONE_ID
,
624 .dir
= NF_CT_DEFAULT_ZONE_DIR
,
626 EXPORT_SYMBOL_GPL(nf_ct_zone_dflt
);
627 #endif /* CONFIG_NF_CONNTRACK */
629 static void __net_init
630 __netfilter_net_init(struct nf_hook_entries __rcu
**e
, int max
)
634 for (h
= 0; h
< max
; h
++)
635 RCU_INIT_POINTER(e
[h
], NULL
);
638 static int __net_init
netfilter_net_init(struct net
*net
)
640 __netfilter_net_init(net
->nf
.hooks_ipv4
, ARRAY_SIZE(net
->nf
.hooks_ipv4
));
641 __netfilter_net_init(net
->nf
.hooks_ipv6
, ARRAY_SIZE(net
->nf
.hooks_ipv6
));
642 #ifdef CONFIG_NETFILTER_FAMILY_ARP
643 __netfilter_net_init(net
->nf
.hooks_arp
, ARRAY_SIZE(net
->nf
.hooks_arp
));
645 #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
646 __netfilter_net_init(net
->nf
.hooks_bridge
, ARRAY_SIZE(net
->nf
.hooks_bridge
));
648 #if IS_ENABLED(CONFIG_DECNET)
649 __netfilter_net_init(net
->nf
.hooks_decnet
, ARRAY_SIZE(net
->nf
.hooks_decnet
));
652 #ifdef CONFIG_PROC_FS
653 net
->nf
.proc_netfilter
= proc_net_mkdir(net
, "netfilter",
655 if (!net
->nf
.proc_netfilter
) {
656 if (!net_eq(net
, &init_net
))
657 pr_err("cannot create netfilter proc entry");
666 static void __net_exit
netfilter_net_exit(struct net
*net
)
668 remove_proc_entry("netfilter", net
->proc_net
);
671 static struct pernet_operations netfilter_net_ops
= {
672 .init
= netfilter_net_init
,
673 .exit
= netfilter_net_exit
,
676 int __init
netfilter_init(void)
680 ret
= register_pernet_subsys(&netfilter_net_ops
);
684 ret
= netfilter_log_init();
690 unregister_pernet_subsys(&netfilter_net_ops
);