1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * net/sched/cls_api.c Packet classifier API.
5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
12 #include <linux/module.h>
13 #include <linux/types.h>
14 #include <linux/kernel.h>
15 #include <linux/string.h>
16 #include <linux/errno.h>
17 #include <linux/err.h>
18 #include <linux/skbuff.h>
19 #include <linux/init.h>
20 #include <linux/kmod.h>
21 #include <linux/slab.h>
22 #include <linux/idr.h>
23 #include <linux/rhashtable.h>
24 #include <linux/jhash.h>
25 #include <linux/rculist.h>
26 #include <net/net_namespace.h>
28 #include <net/netlink.h>
29 #include <net/pkt_sched.h>
30 #include <net/pkt_cls.h>
31 #include <net/tc_act/tc_pedit.h>
32 #include <net/tc_act/tc_mirred.h>
33 #include <net/tc_act/tc_vlan.h>
34 #include <net/tc_act/tc_tunnel_key.h>
35 #include <net/tc_act/tc_csum.h>
36 #include <net/tc_act/tc_gact.h>
37 #include <net/tc_act/tc_police.h>
38 #include <net/tc_act/tc_sample.h>
39 #include <net/tc_act/tc_skbedit.h>
40 #include <net/tc_act/tc_ct.h>
41 #include <net/tc_act/tc_mpls.h>
42 #include <net/flow_offload.h>
44 extern const struct nla_policy rtm_tca_policy
[TCA_MAX
+ 1];
46 /* The list of all installed classifier types */
47 static LIST_HEAD(tcf_proto_base
);
49 /* Protects list of registered TC modules. It is pure SMP lock. */
50 static DEFINE_RWLOCK(cls_mod_lock
);
52 static u32
destroy_obj_hashfn(const struct tcf_proto
*tp
)
54 return jhash_3words(tp
->chain
->index
, tp
->prio
,
55 (__force __u32
)tp
->protocol
, 0);
58 static void tcf_proto_signal_destroying(struct tcf_chain
*chain
,
61 struct tcf_block
*block
= chain
->block
;
63 mutex_lock(&block
->proto_destroy_lock
);
64 hash_add_rcu(block
->proto_destroy_ht
, &tp
->destroy_ht_node
,
65 destroy_obj_hashfn(tp
));
66 mutex_unlock(&block
->proto_destroy_lock
);
69 static bool tcf_proto_cmp(const struct tcf_proto
*tp1
,
70 const struct tcf_proto
*tp2
)
72 return tp1
->chain
->index
== tp2
->chain
->index
&&
73 tp1
->prio
== tp2
->prio
&&
74 tp1
->protocol
== tp2
->protocol
;
77 static bool tcf_proto_exists_destroying(struct tcf_chain
*chain
,
80 u32 hash
= destroy_obj_hashfn(tp
);
81 struct tcf_proto
*iter
;
85 hash_for_each_possible_rcu(chain
->block
->proto_destroy_ht
, iter
,
86 destroy_ht_node
, hash
) {
87 if (tcf_proto_cmp(tp
, iter
)) {
98 tcf_proto_signal_destroyed(struct tcf_chain
*chain
, struct tcf_proto
*tp
)
100 struct tcf_block
*block
= chain
->block
;
102 mutex_lock(&block
->proto_destroy_lock
);
103 if (hash_hashed(&tp
->destroy_ht_node
))
104 hash_del_rcu(&tp
->destroy_ht_node
);
105 mutex_unlock(&block
->proto_destroy_lock
);
108 /* Find classifier type by string name */
110 static const struct tcf_proto_ops
*__tcf_proto_lookup_ops(const char *kind
)
112 const struct tcf_proto_ops
*t
, *res
= NULL
;
115 read_lock(&cls_mod_lock
);
116 list_for_each_entry(t
, &tcf_proto_base
, head
) {
117 if (strcmp(kind
, t
->kind
) == 0) {
118 if (try_module_get(t
->owner
))
123 read_unlock(&cls_mod_lock
);
128 static const struct tcf_proto_ops
*
129 tcf_proto_lookup_ops(const char *kind
, bool rtnl_held
,
130 struct netlink_ext_ack
*extack
)
132 const struct tcf_proto_ops
*ops
;
134 ops
= __tcf_proto_lookup_ops(kind
);
137 #ifdef CONFIG_MODULES
140 request_module("cls_%s", kind
);
143 ops
= __tcf_proto_lookup_ops(kind
);
144 /* We dropped the RTNL semaphore in order to perform
145 * the module load. So, even if we succeeded in loading
146 * the module we have to replay the request. We indicate
147 * this using -EAGAIN.
150 module_put(ops
->owner
);
151 return ERR_PTR(-EAGAIN
);
154 NL_SET_ERR_MSG(extack
, "TC classifier not found");
155 return ERR_PTR(-ENOENT
);
158 /* Register(unregister) new classifier type */
160 int register_tcf_proto_ops(struct tcf_proto_ops
*ops
)
162 struct tcf_proto_ops
*t
;
165 write_lock(&cls_mod_lock
);
166 list_for_each_entry(t
, &tcf_proto_base
, head
)
167 if (!strcmp(ops
->kind
, t
->kind
))
170 list_add_tail(&ops
->head
, &tcf_proto_base
);
173 write_unlock(&cls_mod_lock
);
176 EXPORT_SYMBOL(register_tcf_proto_ops
);
178 static struct workqueue_struct
*tc_filter_wq
;
180 int unregister_tcf_proto_ops(struct tcf_proto_ops
*ops
)
182 struct tcf_proto_ops
*t
;
185 /* Wait for outstanding call_rcu()s, if any, from a
186 * tcf_proto_ops's destroy() handler.
189 flush_workqueue(tc_filter_wq
);
191 write_lock(&cls_mod_lock
);
192 list_for_each_entry(t
, &tcf_proto_base
, head
) {
199 write_unlock(&cls_mod_lock
);
202 EXPORT_SYMBOL(unregister_tcf_proto_ops
);
204 bool tcf_queue_work(struct rcu_work
*rwork
, work_func_t func
)
206 INIT_RCU_WORK(rwork
, func
);
207 return queue_rcu_work(tc_filter_wq
, rwork
);
209 EXPORT_SYMBOL(tcf_queue_work
);
211 /* Select new prio value from the range, managed by kernel. */
213 static inline u32
tcf_auto_prio(struct tcf_proto
*tp
)
215 u32 first
= TC_H_MAKE(0xC0000000U
, 0U);
218 first
= tp
->prio
- 1;
220 return TC_H_MAJ(first
);
223 static bool tcf_proto_check_kind(struct nlattr
*kind
, char *name
)
226 return nla_strlcpy(name
, kind
, IFNAMSIZ
) >= IFNAMSIZ
;
227 memset(name
, 0, IFNAMSIZ
);
231 static bool tcf_proto_is_unlocked(const char *kind
)
233 const struct tcf_proto_ops
*ops
;
236 if (strlen(kind
) == 0)
239 ops
= tcf_proto_lookup_ops(kind
, false, NULL
);
240 /* On error return false to take rtnl lock. Proto lookup/create
241 * functions will perform lookup again and properly handle errors.
246 ret
= !!(ops
->flags
& TCF_PROTO_OPS_DOIT_UNLOCKED
);
247 module_put(ops
->owner
);
251 static struct tcf_proto
*tcf_proto_create(const char *kind
, u32 protocol
,
252 u32 prio
, struct tcf_chain
*chain
,
254 struct netlink_ext_ack
*extack
)
256 struct tcf_proto
*tp
;
259 tp
= kzalloc(sizeof(*tp
), GFP_KERNEL
);
261 return ERR_PTR(-ENOBUFS
);
263 tp
->ops
= tcf_proto_lookup_ops(kind
, rtnl_held
, extack
);
264 if (IS_ERR(tp
->ops
)) {
265 err
= PTR_ERR(tp
->ops
);
268 tp
->classify
= tp
->ops
->classify
;
269 tp
->protocol
= protocol
;
272 spin_lock_init(&tp
->lock
);
273 refcount_set(&tp
->refcnt
, 1);
275 err
= tp
->ops
->init(tp
);
277 module_put(tp
->ops
->owner
);
287 static void tcf_proto_get(struct tcf_proto
*tp
)
289 refcount_inc(&tp
->refcnt
);
292 static void tcf_chain_put(struct tcf_chain
*chain
);
294 static void tcf_proto_destroy(struct tcf_proto
*tp
, bool rtnl_held
,
295 bool sig_destroy
, struct netlink_ext_ack
*extack
)
297 tp
->ops
->destroy(tp
, rtnl_held
, extack
);
299 tcf_proto_signal_destroyed(tp
->chain
, tp
);
300 tcf_chain_put(tp
->chain
);
301 module_put(tp
->ops
->owner
);
305 static void tcf_proto_put(struct tcf_proto
*tp
, bool rtnl_held
,
306 struct netlink_ext_ack
*extack
)
308 if (refcount_dec_and_test(&tp
->refcnt
))
309 tcf_proto_destroy(tp
, rtnl_held
, true, extack
);
312 static bool tcf_proto_check_delete(struct tcf_proto
*tp
)
314 if (tp
->ops
->delete_empty
)
315 return tp
->ops
->delete_empty(tp
);
321 static void tcf_proto_mark_delete(struct tcf_proto
*tp
)
323 spin_lock(&tp
->lock
);
325 spin_unlock(&tp
->lock
);
328 static bool tcf_proto_is_deleting(struct tcf_proto
*tp
)
332 spin_lock(&tp
->lock
);
333 deleting
= tp
->deleting
;
334 spin_unlock(&tp
->lock
);
339 #define ASSERT_BLOCK_LOCKED(block) \
340 lockdep_assert_held(&(block)->lock)
342 struct tcf_filter_chain_list_item
{
343 struct list_head list
;
344 tcf_chain_head_change_t
*chain_head_change
;
345 void *chain_head_change_priv
;
348 static struct tcf_chain
*tcf_chain_create(struct tcf_block
*block
,
351 struct tcf_chain
*chain
;
353 ASSERT_BLOCK_LOCKED(block
);
355 chain
= kzalloc(sizeof(*chain
), GFP_KERNEL
);
358 list_add_tail_rcu(&chain
->list
, &block
->chain_list
);
359 mutex_init(&chain
->filter_chain_lock
);
360 chain
->block
= block
;
361 chain
->index
= chain_index
;
364 block
->chain0
.chain
= chain
;
368 static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item
*item
,
369 struct tcf_proto
*tp_head
)
371 if (item
->chain_head_change
)
372 item
->chain_head_change(tp_head
, item
->chain_head_change_priv
);
375 static void tcf_chain0_head_change(struct tcf_chain
*chain
,
376 struct tcf_proto
*tp_head
)
378 struct tcf_filter_chain_list_item
*item
;
379 struct tcf_block
*block
= chain
->block
;
384 mutex_lock(&block
->lock
);
385 list_for_each_entry(item
, &block
->chain0
.filter_chain_list
, list
)
386 tcf_chain_head_change_item(item
, tp_head
);
387 mutex_unlock(&block
->lock
);
390 /* Returns true if block can be safely freed. */
392 static bool tcf_chain_detach(struct tcf_chain
*chain
)
394 struct tcf_block
*block
= chain
->block
;
396 ASSERT_BLOCK_LOCKED(block
);
398 list_del_rcu(&chain
->list
);
400 block
->chain0
.chain
= NULL
;
402 if (list_empty(&block
->chain_list
) &&
403 refcount_read(&block
->refcnt
) == 0)
409 static void tcf_block_destroy(struct tcf_block
*block
)
411 mutex_destroy(&block
->lock
);
412 mutex_destroy(&block
->proto_destroy_lock
);
413 kfree_rcu(block
, rcu
);
416 static void tcf_chain_destroy(struct tcf_chain
*chain
, bool free_block
)
418 struct tcf_block
*block
= chain
->block
;
420 mutex_destroy(&chain
->filter_chain_lock
);
421 kfree_rcu(chain
, rcu
);
423 tcf_block_destroy(block
);
426 static void tcf_chain_hold(struct tcf_chain
*chain
)
428 ASSERT_BLOCK_LOCKED(chain
->block
);
433 static bool tcf_chain_held_by_acts_only(struct tcf_chain
*chain
)
435 ASSERT_BLOCK_LOCKED(chain
->block
);
437 /* In case all the references are action references, this
438 * chain should not be shown to the user.
440 return chain
->refcnt
== chain
->action_refcnt
;
443 static struct tcf_chain
*tcf_chain_lookup(struct tcf_block
*block
,
446 struct tcf_chain
*chain
;
448 ASSERT_BLOCK_LOCKED(block
);
450 list_for_each_entry(chain
, &block
->chain_list
, list
) {
451 if (chain
->index
== chain_index
)
457 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
458 static struct tcf_chain
*tcf_chain_lookup_rcu(const struct tcf_block
*block
,
461 struct tcf_chain
*chain
;
463 list_for_each_entry_rcu(chain
, &block
->chain_list
, list
) {
464 if (chain
->index
== chain_index
)
471 static int tc_chain_notify(struct tcf_chain
*chain
, struct sk_buff
*oskb
,
472 u32 seq
, u16 flags
, int event
, bool unicast
);
474 static struct tcf_chain
*__tcf_chain_get(struct tcf_block
*block
,
475 u32 chain_index
, bool create
,
478 struct tcf_chain
*chain
= NULL
;
479 bool is_first_reference
;
481 mutex_lock(&block
->lock
);
482 chain
= tcf_chain_lookup(block
, chain_index
);
484 tcf_chain_hold(chain
);
488 chain
= tcf_chain_create(block
, chain_index
);
494 ++chain
->action_refcnt
;
495 is_first_reference
= chain
->refcnt
- chain
->action_refcnt
== 1;
496 mutex_unlock(&block
->lock
);
498 /* Send notification only in case we got the first
499 * non-action reference. Until then, the chain acts only as
500 * a placeholder for actions pointing to it and user ought
501 * not know about them.
503 if (is_first_reference
&& !by_act
)
504 tc_chain_notify(chain
, NULL
, 0, NLM_F_CREATE
| NLM_F_EXCL
,
505 RTM_NEWCHAIN
, false);
510 mutex_unlock(&block
->lock
);
514 static struct tcf_chain
*tcf_chain_get(struct tcf_block
*block
, u32 chain_index
,
517 return __tcf_chain_get(block
, chain_index
, create
, false);
520 struct tcf_chain
*tcf_chain_get_by_act(struct tcf_block
*block
, u32 chain_index
)
522 return __tcf_chain_get(block
, chain_index
, true, true);
524 EXPORT_SYMBOL(tcf_chain_get_by_act
);
526 static void tc_chain_tmplt_del(const struct tcf_proto_ops
*tmplt_ops
,
528 static int tc_chain_notify_delete(const struct tcf_proto_ops
*tmplt_ops
,
529 void *tmplt_priv
, u32 chain_index
,
530 struct tcf_block
*block
, struct sk_buff
*oskb
,
531 u32 seq
, u16 flags
, bool unicast
);
533 static void __tcf_chain_put(struct tcf_chain
*chain
, bool by_act
,
534 bool explicitly_created
)
536 struct tcf_block
*block
= chain
->block
;
537 const struct tcf_proto_ops
*tmplt_ops
;
538 bool free_block
= false;
542 mutex_lock(&block
->lock
);
543 if (explicitly_created
) {
544 if (!chain
->explicitly_created
) {
545 mutex_unlock(&block
->lock
);
548 chain
->explicitly_created
= false;
552 chain
->action_refcnt
--;
554 /* tc_chain_notify_delete can't be called while holding block lock.
555 * However, when block is unlocked chain can be changed concurrently, so
556 * save these to temporary variables.
558 refcnt
= --chain
->refcnt
;
559 tmplt_ops
= chain
->tmplt_ops
;
560 tmplt_priv
= chain
->tmplt_priv
;
562 /* The last dropped non-action reference will trigger notification. */
563 if (refcnt
- chain
->action_refcnt
== 0 && !by_act
) {
564 tc_chain_notify_delete(tmplt_ops
, tmplt_priv
, chain
->index
,
565 block
, NULL
, 0, 0, false);
566 /* Last reference to chain, no need to lock. */
567 chain
->flushing
= false;
571 free_block
= tcf_chain_detach(chain
);
572 mutex_unlock(&block
->lock
);
575 tc_chain_tmplt_del(tmplt_ops
, tmplt_priv
);
576 tcf_chain_destroy(chain
, free_block
);
580 static void tcf_chain_put(struct tcf_chain
*chain
)
582 __tcf_chain_put(chain
, false, false);
585 void tcf_chain_put_by_act(struct tcf_chain
*chain
)
587 __tcf_chain_put(chain
, true, false);
589 EXPORT_SYMBOL(tcf_chain_put_by_act
);
591 static void tcf_chain_put_explicitly_created(struct tcf_chain
*chain
)
593 __tcf_chain_put(chain
, false, true);
596 static void tcf_chain_flush(struct tcf_chain
*chain
, bool rtnl_held
)
598 struct tcf_proto
*tp
, *tp_next
;
600 mutex_lock(&chain
->filter_chain_lock
);
601 tp
= tcf_chain_dereference(chain
->filter_chain
, chain
);
603 tp_next
= rcu_dereference_protected(tp
->next
, 1);
604 tcf_proto_signal_destroying(chain
, tp
);
607 tp
= tcf_chain_dereference(chain
->filter_chain
, chain
);
608 RCU_INIT_POINTER(chain
->filter_chain
, NULL
);
609 tcf_chain0_head_change(chain
, NULL
);
610 chain
->flushing
= true;
611 mutex_unlock(&chain
->filter_chain_lock
);
614 tp_next
= rcu_dereference_protected(tp
->next
, 1);
615 tcf_proto_put(tp
, rtnl_held
, NULL
);
620 static int tcf_block_setup(struct tcf_block
*block
,
621 struct flow_block_offload
*bo
);
623 static void tc_indr_block_cmd(struct net_device
*dev
, struct tcf_block
*block
,
624 flow_indr_block_bind_cb_t
*cb
, void *cb_priv
,
625 enum flow_block_command command
, bool ingress
)
627 struct flow_block_offload bo
= {
629 .binder_type
= ingress
?
630 FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS
:
631 FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS
,
633 .block_shared
= tcf_block_non_null_shared(block
),
635 INIT_LIST_HEAD(&bo
.cb_list
);
640 bo
.block
= &block
->flow_block
;
642 down_write(&block
->cb_lock
);
643 cb(dev
, cb_priv
, TC_SETUP_BLOCK
, &bo
);
645 tcf_block_setup(block
, &bo
);
646 up_write(&block
->cb_lock
);
649 static struct tcf_block
*tc_dev_block(struct net_device
*dev
, bool ingress
)
651 const struct Qdisc_class_ops
*cops
;
652 const struct Qdisc_ops
*ops
;
655 if (!dev_ingress_queue(dev
))
658 qdisc
= dev_ingress_queue(dev
)->qdisc_sleeping
;
666 if (!ingress
&& !strcmp("ingress", ops
->id
))
673 if (!cops
->tcf_block
)
676 return cops
->tcf_block(qdisc
,
677 ingress
? TC_H_MIN_INGRESS
: TC_H_MIN_EGRESS
,
681 static void tc_indr_block_get_and_cmd(struct net_device
*dev
,
682 flow_indr_block_bind_cb_t
*cb
,
684 enum flow_block_command command
)
686 struct tcf_block
*block
;
688 block
= tc_dev_block(dev
, true);
689 tc_indr_block_cmd(dev
, block
, cb
, cb_priv
, command
, true);
691 block
= tc_dev_block(dev
, false);
692 tc_indr_block_cmd(dev
, block
, cb
, cb_priv
, command
, false);
695 static void tc_indr_block_call(struct tcf_block
*block
,
696 struct net_device
*dev
,
697 struct tcf_block_ext_info
*ei
,
698 enum flow_block_command command
,
699 struct netlink_ext_ack
*extack
)
701 struct flow_block_offload bo
= {
703 .binder_type
= ei
->binder_type
,
705 .block
= &block
->flow_block
,
706 .block_shared
= tcf_block_shared(block
),
709 INIT_LIST_HEAD(&bo
.cb_list
);
711 flow_indr_block_call(dev
, &bo
, command
, TC_SETUP_BLOCK
);
712 tcf_block_setup(block
, &bo
);
715 static bool tcf_block_offload_in_use(struct tcf_block
*block
)
717 return atomic_read(&block
->offloadcnt
);
720 static int tcf_block_offload_cmd(struct tcf_block
*block
,
721 struct net_device
*dev
,
722 struct tcf_block_ext_info
*ei
,
723 enum flow_block_command command
,
724 struct netlink_ext_ack
*extack
)
726 struct flow_block_offload bo
= {};
729 bo
.net
= dev_net(dev
);
730 bo
.command
= command
;
731 bo
.binder_type
= ei
->binder_type
;
732 bo
.block
= &block
->flow_block
;
733 bo
.block_shared
= tcf_block_shared(block
);
735 INIT_LIST_HEAD(&bo
.cb_list
);
737 err
= dev
->netdev_ops
->ndo_setup_tc(dev
, TC_SETUP_BLOCK
, &bo
);
741 return tcf_block_setup(block
, &bo
);
744 static int tcf_block_offload_bind(struct tcf_block
*block
, struct Qdisc
*q
,
745 struct tcf_block_ext_info
*ei
,
746 struct netlink_ext_ack
*extack
)
748 struct net_device
*dev
= q
->dev_queue
->dev
;
751 down_write(&block
->cb_lock
);
752 if (!dev
->netdev_ops
->ndo_setup_tc
)
753 goto no_offload_dev_inc
;
755 /* If tc offload feature is disabled and the block we try to bind
756 * to already has some offloaded filters, forbid to bind.
758 if (!tc_can_offload(dev
) && tcf_block_offload_in_use(block
)) {
759 NL_SET_ERR_MSG(extack
, "Bind to offloaded block failed as dev has offload disabled");
764 err
= tcf_block_offload_cmd(block
, dev
, ei
, FLOW_BLOCK_BIND
, extack
);
765 if (err
== -EOPNOTSUPP
)
766 goto no_offload_dev_inc
;
770 tc_indr_block_call(block
, dev
, ei
, FLOW_BLOCK_BIND
, extack
);
771 up_write(&block
->cb_lock
);
775 if (tcf_block_offload_in_use(block
)) {
780 block
->nooffloaddevcnt
++;
781 tc_indr_block_call(block
, dev
, ei
, FLOW_BLOCK_BIND
, extack
);
783 up_write(&block
->cb_lock
);
787 static void tcf_block_offload_unbind(struct tcf_block
*block
, struct Qdisc
*q
,
788 struct tcf_block_ext_info
*ei
)
790 struct net_device
*dev
= q
->dev_queue
->dev
;
793 down_write(&block
->cb_lock
);
794 tc_indr_block_call(block
, dev
, ei
, FLOW_BLOCK_UNBIND
, NULL
);
796 if (!dev
->netdev_ops
->ndo_setup_tc
)
797 goto no_offload_dev_dec
;
798 err
= tcf_block_offload_cmd(block
, dev
, ei
, FLOW_BLOCK_UNBIND
, NULL
);
799 if (err
== -EOPNOTSUPP
)
800 goto no_offload_dev_dec
;
801 up_write(&block
->cb_lock
);
805 WARN_ON(block
->nooffloaddevcnt
-- == 0);
806 up_write(&block
->cb_lock
);
810 tcf_chain0_head_change_cb_add(struct tcf_block
*block
,
811 struct tcf_block_ext_info
*ei
,
812 struct netlink_ext_ack
*extack
)
814 struct tcf_filter_chain_list_item
*item
;
815 struct tcf_chain
*chain0
;
817 item
= kmalloc(sizeof(*item
), GFP_KERNEL
);
819 NL_SET_ERR_MSG(extack
, "Memory allocation for head change callback item failed");
822 item
->chain_head_change
= ei
->chain_head_change
;
823 item
->chain_head_change_priv
= ei
->chain_head_change_priv
;
825 mutex_lock(&block
->lock
);
826 chain0
= block
->chain0
.chain
;
828 tcf_chain_hold(chain0
);
830 list_add(&item
->list
, &block
->chain0
.filter_chain_list
);
831 mutex_unlock(&block
->lock
);
834 struct tcf_proto
*tp_head
;
836 mutex_lock(&chain0
->filter_chain_lock
);
838 tp_head
= tcf_chain_dereference(chain0
->filter_chain
, chain0
);
840 tcf_chain_head_change_item(item
, tp_head
);
842 mutex_lock(&block
->lock
);
843 list_add(&item
->list
, &block
->chain0
.filter_chain_list
);
844 mutex_unlock(&block
->lock
);
846 mutex_unlock(&chain0
->filter_chain_lock
);
847 tcf_chain_put(chain0
);
854 tcf_chain0_head_change_cb_del(struct tcf_block
*block
,
855 struct tcf_block_ext_info
*ei
)
857 struct tcf_filter_chain_list_item
*item
;
859 mutex_lock(&block
->lock
);
860 list_for_each_entry(item
, &block
->chain0
.filter_chain_list
, list
) {
861 if ((!ei
->chain_head_change
&& !ei
->chain_head_change_priv
) ||
862 (item
->chain_head_change
== ei
->chain_head_change
&&
863 item
->chain_head_change_priv
== ei
->chain_head_change_priv
)) {
864 if (block
->chain0
.chain
)
865 tcf_chain_head_change_item(item
, NULL
);
866 list_del(&item
->list
);
867 mutex_unlock(&block
->lock
);
873 mutex_unlock(&block
->lock
);
878 spinlock_t idr_lock
; /* Protects idr */
882 static unsigned int tcf_net_id
;
884 static int tcf_block_insert(struct tcf_block
*block
, struct net
*net
,
885 struct netlink_ext_ack
*extack
)
887 struct tcf_net
*tn
= net_generic(net
, tcf_net_id
);
890 idr_preload(GFP_KERNEL
);
891 spin_lock(&tn
->idr_lock
);
892 err
= idr_alloc_u32(&tn
->idr
, block
, &block
->index
, block
->index
,
894 spin_unlock(&tn
->idr_lock
);
900 static void tcf_block_remove(struct tcf_block
*block
, struct net
*net
)
902 struct tcf_net
*tn
= net_generic(net
, tcf_net_id
);
904 spin_lock(&tn
->idr_lock
);
905 idr_remove(&tn
->idr
, block
->index
);
906 spin_unlock(&tn
->idr_lock
);
909 static struct tcf_block
*tcf_block_create(struct net
*net
, struct Qdisc
*q
,
911 struct netlink_ext_ack
*extack
)
913 struct tcf_block
*block
;
915 block
= kzalloc(sizeof(*block
), GFP_KERNEL
);
917 NL_SET_ERR_MSG(extack
, "Memory allocation for block failed");
918 return ERR_PTR(-ENOMEM
);
920 mutex_init(&block
->lock
);
921 mutex_init(&block
->proto_destroy_lock
);
922 init_rwsem(&block
->cb_lock
);
923 flow_block_init(&block
->flow_block
);
924 INIT_LIST_HEAD(&block
->chain_list
);
925 INIT_LIST_HEAD(&block
->owner_list
);
926 INIT_LIST_HEAD(&block
->chain0
.filter_chain_list
);
928 refcount_set(&block
->refcnt
, 1);
930 block
->index
= block_index
;
932 /* Don't store q pointer for blocks which are shared */
933 if (!tcf_block_shared(block
))
938 static struct tcf_block
*tcf_block_lookup(struct net
*net
, u32 block_index
)
940 struct tcf_net
*tn
= net_generic(net
, tcf_net_id
);
942 return idr_find(&tn
->idr
, block_index
);
945 static struct tcf_block
*tcf_block_refcnt_get(struct net
*net
, u32 block_index
)
947 struct tcf_block
*block
;
950 block
= tcf_block_lookup(net
, block_index
);
951 if (block
&& !refcount_inc_not_zero(&block
->refcnt
))
958 static struct tcf_chain
*
959 __tcf_get_next_chain(struct tcf_block
*block
, struct tcf_chain
*chain
)
961 mutex_lock(&block
->lock
);
963 chain
= list_is_last(&chain
->list
, &block
->chain_list
) ?
964 NULL
: list_next_entry(chain
, list
);
966 chain
= list_first_entry_or_null(&block
->chain_list
,
967 struct tcf_chain
, list
);
969 /* skip all action-only chains */
970 while (chain
&& tcf_chain_held_by_acts_only(chain
))
971 chain
= list_is_last(&chain
->list
, &block
->chain_list
) ?
972 NULL
: list_next_entry(chain
, list
);
975 tcf_chain_hold(chain
);
976 mutex_unlock(&block
->lock
);
981 /* Function to be used by all clients that want to iterate over all chains on
982 * block. It properly obtains block->lock and takes reference to chain before
983 * returning it. Users of this function must be tolerant to concurrent chain
984 * insertion/deletion or ensure that no concurrent chain modification is
985 * possible. Note that all netlink dump callbacks cannot guarantee to provide
986 * consistent dump because rtnl lock is released each time skb is filled with
987 * data and sent to user-space.
991 tcf_get_next_chain(struct tcf_block
*block
, struct tcf_chain
*chain
)
993 struct tcf_chain
*chain_next
= __tcf_get_next_chain(block
, chain
);
996 tcf_chain_put(chain
);
1000 EXPORT_SYMBOL(tcf_get_next_chain
);
1002 static struct tcf_proto
*
1003 __tcf_get_next_proto(struct tcf_chain
*chain
, struct tcf_proto
*tp
)
1008 mutex_lock(&chain
->filter_chain_lock
);
1011 tp
= tcf_chain_dereference(chain
->filter_chain
, chain
);
1012 } else if (tcf_proto_is_deleting(tp
)) {
1013 /* 'deleting' flag is set and chain->filter_chain_lock was
1014 * unlocked, which means next pointer could be invalid. Restart
1017 prio
= tp
->prio
+ 1;
1018 tp
= tcf_chain_dereference(chain
->filter_chain
, chain
);
1020 for (; tp
; tp
= tcf_chain_dereference(tp
->next
, chain
))
1021 if (!tp
->deleting
&& tp
->prio
>= prio
)
1024 tp
= tcf_chain_dereference(tp
->next
, chain
);
1030 mutex_unlock(&chain
->filter_chain_lock
);
1035 /* Function to be used by all clients that want to iterate over all tp's on
1036 * chain. Users of this function must be tolerant to concurrent tp
1037 * insertion/deletion or ensure that no concurrent chain modification is
1038 * possible. Note that all netlink dump callbacks cannot guarantee to provide
1039 * consistent dump because rtnl lock is released each time skb is filled with
1040 * data and sent to user-space.
1044 tcf_get_next_proto(struct tcf_chain
*chain
, struct tcf_proto
*tp
,
1047 struct tcf_proto
*tp_next
= __tcf_get_next_proto(chain
, tp
);
1050 tcf_proto_put(tp
, rtnl_held
, NULL
);
1054 EXPORT_SYMBOL(tcf_get_next_proto
);
1056 static void tcf_block_flush_all_chains(struct tcf_block
*block
, bool rtnl_held
)
1058 struct tcf_chain
*chain
;
1060 /* Last reference to block. At this point chains cannot be added or
1061 * removed concurrently.
1063 for (chain
= tcf_get_next_chain(block
, NULL
);
1065 chain
= tcf_get_next_chain(block
, chain
)) {
1066 tcf_chain_put_explicitly_created(chain
);
1067 tcf_chain_flush(chain
, rtnl_held
);
1071 /* Lookup Qdisc and increments its reference counter.
1072 * Set parent, if necessary.
1075 static int __tcf_qdisc_find(struct net
*net
, struct Qdisc
**q
,
1076 u32
*parent
, int ifindex
, bool rtnl_held
,
1077 struct netlink_ext_ack
*extack
)
1079 const struct Qdisc_class_ops
*cops
;
1080 struct net_device
*dev
;
1083 if (ifindex
== TCM_IFINDEX_MAGIC_BLOCK
)
1089 dev
= dev_get_by_index_rcu(net
, ifindex
);
1098 *parent
= (*q
)->handle
;
1100 *q
= qdisc_lookup_rcu(dev
, TC_H_MAJ(*parent
));
1102 NL_SET_ERR_MSG(extack
, "Parent Qdisc doesn't exists");
1108 *q
= qdisc_refcount_inc_nz(*q
);
1110 NL_SET_ERR_MSG(extack
, "Parent Qdisc doesn't exists");
1115 /* Is it classful? */
1116 cops
= (*q
)->ops
->cl_ops
;
1118 NL_SET_ERR_MSG(extack
, "Qdisc not classful");
1123 if (!cops
->tcf_block
) {
1124 NL_SET_ERR_MSG(extack
, "Class doesn't support blocks");
1130 /* At this point we know that qdisc is not noop_qdisc,
1131 * which means that qdisc holds a reference to net_device
1132 * and we hold a reference to qdisc, so it is safe to release
1144 qdisc_put_unlocked(*q
);
1150 static int __tcf_qdisc_cl_find(struct Qdisc
*q
, u32 parent
, unsigned long *cl
,
1151 int ifindex
, struct netlink_ext_ack
*extack
)
1153 if (ifindex
== TCM_IFINDEX_MAGIC_BLOCK
)
1156 /* Do we search for filter, attached to class? */
1157 if (TC_H_MIN(parent
)) {
1158 const struct Qdisc_class_ops
*cops
= q
->ops
->cl_ops
;
1160 *cl
= cops
->find(q
, parent
);
1162 NL_SET_ERR_MSG(extack
, "Specified class doesn't exist");
1170 static struct tcf_block
*__tcf_block_find(struct net
*net
, struct Qdisc
*q
,
1171 unsigned long cl
, int ifindex
,
1173 struct netlink_ext_ack
*extack
)
1175 struct tcf_block
*block
;
1177 if (ifindex
== TCM_IFINDEX_MAGIC_BLOCK
) {
1178 block
= tcf_block_refcnt_get(net
, block_index
);
1180 NL_SET_ERR_MSG(extack
, "Block of given index was not found");
1181 return ERR_PTR(-EINVAL
);
1184 const struct Qdisc_class_ops
*cops
= q
->ops
->cl_ops
;
1186 block
= cops
->tcf_block(q
, cl
, extack
);
1188 return ERR_PTR(-EINVAL
);
1190 if (tcf_block_shared(block
)) {
1191 NL_SET_ERR_MSG(extack
, "This filter block is shared. Please use the block index to manipulate the filters");
1192 return ERR_PTR(-EOPNOTSUPP
);
1195 /* Always take reference to block in order to support execution
1196 * of rules update path of cls API without rtnl lock. Caller
1197 * must release block when it is finished using it. 'if' block
1198 * of this conditional obtain reference to block by calling
1199 * tcf_block_refcnt_get().
1201 refcount_inc(&block
->refcnt
);
1207 static void __tcf_block_put(struct tcf_block
*block
, struct Qdisc
*q
,
1208 struct tcf_block_ext_info
*ei
, bool rtnl_held
)
1210 if (refcount_dec_and_mutex_lock(&block
->refcnt
, &block
->lock
)) {
1211 /* Flushing/putting all chains will cause the block to be
1212 * deallocated when last chain is freed. However, if chain_list
1213 * is empty, block has to be manually deallocated. After block
1214 * reference counter reached 0, it is no longer possible to
1215 * increment it or add new chains to block.
1217 bool free_block
= list_empty(&block
->chain_list
);
1219 mutex_unlock(&block
->lock
);
1220 if (tcf_block_shared(block
))
1221 tcf_block_remove(block
, block
->net
);
1224 tcf_block_offload_unbind(block
, q
, ei
);
1227 tcf_block_destroy(block
);
1229 tcf_block_flush_all_chains(block
, rtnl_held
);
1231 tcf_block_offload_unbind(block
, q
, ei
);
1235 static void tcf_block_refcnt_put(struct tcf_block
*block
, bool rtnl_held
)
1237 __tcf_block_put(block
, NULL
, NULL
, rtnl_held
);
1241 * Set q, parent, cl when appropriate.
1244 static struct tcf_block
*tcf_block_find(struct net
*net
, struct Qdisc
**q
,
1245 u32
*parent
, unsigned long *cl
,
1246 int ifindex
, u32 block_index
,
1247 struct netlink_ext_ack
*extack
)
1249 struct tcf_block
*block
;
1254 err
= __tcf_qdisc_find(net
, q
, parent
, ifindex
, true, extack
);
1258 err
= __tcf_qdisc_cl_find(*q
, *parent
, cl
, ifindex
, extack
);
1262 block
= __tcf_block_find(net
, *q
, *cl
, ifindex
, block_index
, extack
);
1263 if (IS_ERR(block
)) {
1264 err
= PTR_ERR(block
);
1275 return ERR_PTR(err
);
1278 static void tcf_block_release(struct Qdisc
*q
, struct tcf_block
*block
,
1281 if (!IS_ERR_OR_NULL(block
))
1282 tcf_block_refcnt_put(block
, rtnl_held
);
1288 qdisc_put_unlocked(q
);
1292 struct tcf_block_owner_item
{
1293 struct list_head list
;
1295 enum flow_block_binder_type binder_type
;
1299 tcf_block_owner_netif_keep_dst(struct tcf_block
*block
,
1301 enum flow_block_binder_type binder_type
)
1303 if (block
->keep_dst
&&
1304 binder_type
!= FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS
&&
1305 binder_type
!= FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS
)
1306 netif_keep_dst(qdisc_dev(q
));
1309 void tcf_block_netif_keep_dst(struct tcf_block
*block
)
1311 struct tcf_block_owner_item
*item
;
1313 block
->keep_dst
= true;
1314 list_for_each_entry(item
, &block
->owner_list
, list
)
1315 tcf_block_owner_netif_keep_dst(block
, item
->q
,
1318 EXPORT_SYMBOL(tcf_block_netif_keep_dst
);
1320 static int tcf_block_owner_add(struct tcf_block
*block
,
1322 enum flow_block_binder_type binder_type
)
1324 struct tcf_block_owner_item
*item
;
1326 item
= kmalloc(sizeof(*item
), GFP_KERNEL
);
1330 item
->binder_type
= binder_type
;
1331 list_add(&item
->list
, &block
->owner_list
);
1335 static void tcf_block_owner_del(struct tcf_block
*block
,
1337 enum flow_block_binder_type binder_type
)
1339 struct tcf_block_owner_item
*item
;
1341 list_for_each_entry(item
, &block
->owner_list
, list
) {
1342 if (item
->q
== q
&& item
->binder_type
== binder_type
) {
1343 list_del(&item
->list
);
1351 int tcf_block_get_ext(struct tcf_block
**p_block
, struct Qdisc
*q
,
1352 struct tcf_block_ext_info
*ei
,
1353 struct netlink_ext_ack
*extack
)
1355 struct net
*net
= qdisc_net(q
);
1356 struct tcf_block
*block
= NULL
;
1359 if (ei
->block_index
)
1360 /* block_index not 0 means the shared block is requested */
1361 block
= tcf_block_refcnt_get(net
, ei
->block_index
);
1364 block
= tcf_block_create(net
, q
, ei
->block_index
, extack
);
1366 return PTR_ERR(block
);
1367 if (tcf_block_shared(block
)) {
1368 err
= tcf_block_insert(block
, net
, extack
);
1370 goto err_block_insert
;
1374 err
= tcf_block_owner_add(block
, q
, ei
->binder_type
);
1376 goto err_block_owner_add
;
1378 tcf_block_owner_netif_keep_dst(block
, q
, ei
->binder_type
);
1380 err
= tcf_chain0_head_change_cb_add(block
, ei
, extack
);
1382 goto err_chain0_head_change_cb_add
;
1384 err
= tcf_block_offload_bind(block
, q
, ei
, extack
);
1386 goto err_block_offload_bind
;
1391 err_block_offload_bind
:
1392 tcf_chain0_head_change_cb_del(block
, ei
);
1393 err_chain0_head_change_cb_add
:
1394 tcf_block_owner_del(block
, q
, ei
->binder_type
);
1395 err_block_owner_add
:
1397 tcf_block_refcnt_put(block
, true);
1400 EXPORT_SYMBOL(tcf_block_get_ext
);
1402 static void tcf_chain_head_change_dflt(struct tcf_proto
*tp_head
, void *priv
)
1404 struct tcf_proto __rcu
**p_filter_chain
= priv
;
1406 rcu_assign_pointer(*p_filter_chain
, tp_head
);
1409 int tcf_block_get(struct tcf_block
**p_block
,
1410 struct tcf_proto __rcu
**p_filter_chain
, struct Qdisc
*q
,
1411 struct netlink_ext_ack
*extack
)
1413 struct tcf_block_ext_info ei
= {
1414 .chain_head_change
= tcf_chain_head_change_dflt
,
1415 .chain_head_change_priv
= p_filter_chain
,
1418 WARN_ON(!p_filter_chain
);
1419 return tcf_block_get_ext(p_block
, q
, &ei
, extack
);
1421 EXPORT_SYMBOL(tcf_block_get
);
1423 /* XXX: Standalone actions are not allowed to jump to any chain, and bound
1424 * actions should be all removed after flushing.
1426 void tcf_block_put_ext(struct tcf_block
*block
, struct Qdisc
*q
,
1427 struct tcf_block_ext_info
*ei
)
1431 tcf_chain0_head_change_cb_del(block
, ei
);
1432 tcf_block_owner_del(block
, q
, ei
->binder_type
);
1434 __tcf_block_put(block
, q
, ei
, true);
1436 EXPORT_SYMBOL(tcf_block_put_ext
);
1438 void tcf_block_put(struct tcf_block
*block
)
1440 struct tcf_block_ext_info ei
= {0, };
1444 tcf_block_put_ext(block
, block
->q
, &ei
);
1447 EXPORT_SYMBOL(tcf_block_put
);
1450 tcf_block_playback_offloads(struct tcf_block
*block
, flow_setup_cb_t
*cb
,
1451 void *cb_priv
, bool add
, bool offload_in_use
,
1452 struct netlink_ext_ack
*extack
)
1454 struct tcf_chain
*chain
, *chain_prev
;
1455 struct tcf_proto
*tp
, *tp_prev
;
1458 lockdep_assert_held(&block
->cb_lock
);
1460 for (chain
= __tcf_get_next_chain(block
, NULL
);
1463 chain
= __tcf_get_next_chain(block
, chain
),
1464 tcf_chain_put(chain_prev
)) {
1465 for (tp
= __tcf_get_next_proto(chain
, NULL
); tp
;
1467 tp
= __tcf_get_next_proto(chain
, tp
),
1468 tcf_proto_put(tp_prev
, true, NULL
)) {
1469 if (tp
->ops
->reoffload
) {
1470 err
= tp
->ops
->reoffload(tp
, add
, cb
, cb_priv
,
1473 goto err_playback_remove
;
1474 } else if (add
&& offload_in_use
) {
1476 NL_SET_ERR_MSG(extack
, "Filter HW offload failed - classifier without re-offloading support");
1477 goto err_playback_remove
;
1484 err_playback_remove
:
1485 tcf_proto_put(tp
, true, NULL
);
1486 tcf_chain_put(chain
);
1487 tcf_block_playback_offloads(block
, cb
, cb_priv
, false, offload_in_use
,
1492 static int tcf_block_bind(struct tcf_block
*block
,
1493 struct flow_block_offload
*bo
)
1495 struct flow_block_cb
*block_cb
, *next
;
1498 lockdep_assert_held(&block
->cb_lock
);
1500 list_for_each_entry(block_cb
, &bo
->cb_list
, list
) {
1501 err
= tcf_block_playback_offloads(block
, block_cb
->cb
,
1502 block_cb
->cb_priv
, true,
1503 tcf_block_offload_in_use(block
),
1507 if (!bo
->unlocked_driver_cb
)
1508 block
->lockeddevcnt
++;
1512 list_splice(&bo
->cb_list
, &block
->flow_block
.cb_list
);
1517 list_for_each_entry_safe(block_cb
, next
, &bo
->cb_list
, list
) {
1519 list_del(&block_cb
->list
);
1520 tcf_block_playback_offloads(block
, block_cb
->cb
,
1521 block_cb
->cb_priv
, false,
1522 tcf_block_offload_in_use(block
),
1524 if (!bo
->unlocked_driver_cb
)
1525 block
->lockeddevcnt
--;
1527 flow_block_cb_free(block_cb
);
1533 static void tcf_block_unbind(struct tcf_block
*block
,
1534 struct flow_block_offload
*bo
)
1536 struct flow_block_cb
*block_cb
, *next
;
1538 lockdep_assert_held(&block
->cb_lock
);
1540 list_for_each_entry_safe(block_cb
, next
, &bo
->cb_list
, list
) {
1541 tcf_block_playback_offloads(block
, block_cb
->cb
,
1542 block_cb
->cb_priv
, false,
1543 tcf_block_offload_in_use(block
),
1545 list_del(&block_cb
->list
);
1546 flow_block_cb_free(block_cb
);
1547 if (!bo
->unlocked_driver_cb
)
1548 block
->lockeddevcnt
--;
1552 static int tcf_block_setup(struct tcf_block
*block
,
1553 struct flow_block_offload
*bo
)
1557 switch (bo
->command
) {
1558 case FLOW_BLOCK_BIND
:
1559 err
= tcf_block_bind(block
, bo
);
1561 case FLOW_BLOCK_UNBIND
:
1563 tcf_block_unbind(block
, bo
);
1573 /* Main classifier routine: scans classifier chain attached
1574 * to this qdisc, (optionally) tests for protocol and asks
1575 * specific classifiers.
1577 static inline int __tcf_classify(struct sk_buff
*skb
,
1578 const struct tcf_proto
*tp
,
1579 const struct tcf_proto
*orig_tp
,
1580 struct tcf_result
*res
,
1582 u32
*last_executed_chain
)
1584 #ifdef CONFIG_NET_CLS_ACT
1585 const int max_reclassify_loop
= 4;
1586 const struct tcf_proto
*first_tp
;
1591 for (; tp
; tp
= rcu_dereference_bh(tp
->next
)) {
1592 __be16 protocol
= tc_skb_protocol(skb
);
1595 if (tp
->protocol
!= protocol
&&
1596 tp
->protocol
!= htons(ETH_P_ALL
))
1599 err
= tp
->classify(skb
, tp
, res
);
1600 #ifdef CONFIG_NET_CLS_ACT
1601 if (unlikely(err
== TC_ACT_RECLASSIFY
&& !compat_mode
)) {
1603 *last_executed_chain
= first_tp
->chain
->index
;
1605 } else if (unlikely(TC_ACT_EXT_CMP(err
, TC_ACT_GOTO_CHAIN
))) {
1606 first_tp
= res
->goto_tp
;
1607 *last_executed_chain
= err
& TC_ACT_EXT_VAL_MASK
;
1615 return TC_ACT_UNSPEC
; /* signal: continue lookup */
1616 #ifdef CONFIG_NET_CLS_ACT
1618 if (unlikely(limit
++ >= max_reclassify_loop
)) {
1619 net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n",
1620 tp
->chain
->block
->index
,
1622 ntohs(tp
->protocol
));
1631 int tcf_classify(struct sk_buff
*skb
, const struct tcf_proto
*tp
,
1632 struct tcf_result
*res
, bool compat_mode
)
1634 u32 last_executed_chain
= 0;
1636 return __tcf_classify(skb
, tp
, tp
, res
, compat_mode
,
1637 &last_executed_chain
);
1639 EXPORT_SYMBOL(tcf_classify
);
1641 int tcf_classify_ingress(struct sk_buff
*skb
,
1642 const struct tcf_block
*ingress_block
,
1643 const struct tcf_proto
*tp
,
1644 struct tcf_result
*res
, bool compat_mode
)
1646 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1647 u32 last_executed_chain
= 0;
1649 return __tcf_classify(skb
, tp
, tp
, res
, compat_mode
,
1650 &last_executed_chain
);
1652 u32 last_executed_chain
= tp
? tp
->chain
->index
: 0;
1653 const struct tcf_proto
*orig_tp
= tp
;
1654 struct tc_skb_ext
*ext
;
1657 ext
= skb_ext_find(skb
, TC_SKB_EXT
);
1659 if (ext
&& ext
->chain
) {
1660 struct tcf_chain
*fchain
;
1662 fchain
= tcf_chain_lookup_rcu(ingress_block
, ext
->chain
);
1666 /* Consume, so cloned/redirect skbs won't inherit ext */
1667 skb_ext_del(skb
, TC_SKB_EXT
);
1669 tp
= rcu_dereference_bh(fchain
->filter_chain
);
1670 last_executed_chain
= fchain
->index
;
1673 ret
= __tcf_classify(skb
, tp
, orig_tp
, res
, compat_mode
,
1674 &last_executed_chain
);
1676 /* If we missed on some chain */
1677 if (ret
== TC_ACT_UNSPEC
&& last_executed_chain
) {
1678 ext
= skb_ext_add(skb
, TC_SKB_EXT
);
1679 if (WARN_ON_ONCE(!ext
))
1681 ext
->chain
= last_executed_chain
;
1687 EXPORT_SYMBOL(tcf_classify_ingress
);
1689 struct tcf_chain_info
{
1690 struct tcf_proto __rcu
**pprev
;
1691 struct tcf_proto __rcu
*next
;
1694 static struct tcf_proto
*tcf_chain_tp_prev(struct tcf_chain
*chain
,
1695 struct tcf_chain_info
*chain_info
)
1697 return tcf_chain_dereference(*chain_info
->pprev
, chain
);
1700 static int tcf_chain_tp_insert(struct tcf_chain
*chain
,
1701 struct tcf_chain_info
*chain_info
,
1702 struct tcf_proto
*tp
)
1704 if (chain
->flushing
)
1707 if (*chain_info
->pprev
== chain
->filter_chain
)
1708 tcf_chain0_head_change(chain
, tp
);
1710 RCU_INIT_POINTER(tp
->next
, tcf_chain_tp_prev(chain
, chain_info
));
1711 rcu_assign_pointer(*chain_info
->pprev
, tp
);
1716 static void tcf_chain_tp_remove(struct tcf_chain
*chain
,
1717 struct tcf_chain_info
*chain_info
,
1718 struct tcf_proto
*tp
)
1720 struct tcf_proto
*next
= tcf_chain_dereference(chain_info
->next
, chain
);
1722 tcf_proto_mark_delete(tp
);
1723 if (tp
== chain
->filter_chain
)
1724 tcf_chain0_head_change(chain
, next
);
1725 RCU_INIT_POINTER(*chain_info
->pprev
, next
);
1728 static struct tcf_proto
*tcf_chain_tp_find(struct tcf_chain
*chain
,
1729 struct tcf_chain_info
*chain_info
,
1730 u32 protocol
, u32 prio
,
1731 bool prio_allocate
);
1733 /* Try to insert new proto.
1734 * If proto with specified priority already exists, free new proto
1735 * and return existing one.
1738 static struct tcf_proto
*tcf_chain_tp_insert_unique(struct tcf_chain
*chain
,
1739 struct tcf_proto
*tp_new
,
1740 u32 protocol
, u32 prio
,
1743 struct tcf_chain_info chain_info
;
1744 struct tcf_proto
*tp
;
1747 mutex_lock(&chain
->filter_chain_lock
);
1749 if (tcf_proto_exists_destroying(chain
, tp_new
)) {
1750 mutex_unlock(&chain
->filter_chain_lock
);
1751 tcf_proto_destroy(tp_new
, rtnl_held
, false, NULL
);
1752 return ERR_PTR(-EAGAIN
);
1755 tp
= tcf_chain_tp_find(chain
, &chain_info
,
1756 protocol
, prio
, false);
1758 err
= tcf_chain_tp_insert(chain
, &chain_info
, tp_new
);
1759 mutex_unlock(&chain
->filter_chain_lock
);
1762 tcf_proto_destroy(tp_new
, rtnl_held
, false, NULL
);
1765 tcf_proto_destroy(tp_new
, rtnl_held
, false, NULL
);
1766 tp_new
= ERR_PTR(err
);
1772 static void tcf_chain_tp_delete_empty(struct tcf_chain
*chain
,
1773 struct tcf_proto
*tp
, bool rtnl_held
,
1774 struct netlink_ext_ack
*extack
)
1776 struct tcf_chain_info chain_info
;
1777 struct tcf_proto
*tp_iter
;
1778 struct tcf_proto
**pprev
;
1779 struct tcf_proto
*next
;
1781 mutex_lock(&chain
->filter_chain_lock
);
1783 /* Atomically find and remove tp from chain. */
1784 for (pprev
= &chain
->filter_chain
;
1785 (tp_iter
= tcf_chain_dereference(*pprev
, chain
));
1786 pprev
= &tp_iter
->next
) {
1787 if (tp_iter
== tp
) {
1788 chain_info
.pprev
= pprev
;
1789 chain_info
.next
= tp_iter
->next
;
1790 WARN_ON(tp_iter
->deleting
);
1794 /* Verify that tp still exists and no new filters were inserted
1796 * Mark tp for deletion if it is empty.
1798 if (!tp_iter
|| !tcf_proto_check_delete(tp
)) {
1799 mutex_unlock(&chain
->filter_chain_lock
);
1803 tcf_proto_signal_destroying(chain
, tp
);
1804 next
= tcf_chain_dereference(chain_info
.next
, chain
);
1805 if (tp
== chain
->filter_chain
)
1806 tcf_chain0_head_change(chain
, next
);
1807 RCU_INIT_POINTER(*chain_info
.pprev
, next
);
1808 mutex_unlock(&chain
->filter_chain_lock
);
1810 tcf_proto_put(tp
, rtnl_held
, extack
);
1813 static struct tcf_proto
*tcf_chain_tp_find(struct tcf_chain
*chain
,
1814 struct tcf_chain_info
*chain_info
,
1815 u32 protocol
, u32 prio
,
1818 struct tcf_proto
**pprev
;
1819 struct tcf_proto
*tp
;
1821 /* Check the chain for existence of proto-tcf with this priority */
1822 for (pprev
= &chain
->filter_chain
;
1823 (tp
= tcf_chain_dereference(*pprev
, chain
));
1824 pprev
= &tp
->next
) {
1825 if (tp
->prio
>= prio
) {
1826 if (tp
->prio
== prio
) {
1827 if (prio_allocate
||
1828 (tp
->protocol
!= protocol
&& protocol
))
1829 return ERR_PTR(-EINVAL
);
1836 chain_info
->pprev
= pprev
;
1838 chain_info
->next
= tp
->next
;
1841 chain_info
->next
= NULL
;
1846 static int tcf_fill_node(struct net
*net
, struct sk_buff
*skb
,
1847 struct tcf_proto
*tp
, struct tcf_block
*block
,
1848 struct Qdisc
*q
, u32 parent
, void *fh
,
1849 u32 portid
, u32 seq
, u16 flags
, int event
,
1853 struct nlmsghdr
*nlh
;
1854 unsigned char *b
= skb_tail_pointer(skb
);
1856 nlh
= nlmsg_put(skb
, portid
, seq
, event
, sizeof(*tcm
), flags
);
1858 goto out_nlmsg_trim
;
1859 tcm
= nlmsg_data(nlh
);
1860 tcm
->tcm_family
= AF_UNSPEC
;
1864 tcm
->tcm_ifindex
= qdisc_dev(q
)->ifindex
;
1865 tcm
->tcm_parent
= parent
;
1867 tcm
->tcm_ifindex
= TCM_IFINDEX_MAGIC_BLOCK
;
1868 tcm
->tcm_block_index
= block
->index
;
1870 tcm
->tcm_info
= TC_H_MAKE(tp
->prio
, tp
->protocol
);
1871 if (nla_put_string(skb
, TCA_KIND
, tp
->ops
->kind
))
1872 goto nla_put_failure
;
1873 if (nla_put_u32(skb
, TCA_CHAIN
, tp
->chain
->index
))
1874 goto nla_put_failure
;
1876 tcm
->tcm_handle
= 0;
1878 if (tp
->ops
->dump
&&
1879 tp
->ops
->dump(net
, tp
, fh
, skb
, tcm
, rtnl_held
) < 0)
1880 goto nla_put_failure
;
1882 nlh
->nlmsg_len
= skb_tail_pointer(skb
) - b
;
1891 static int tfilter_notify(struct net
*net
, struct sk_buff
*oskb
,
1892 struct nlmsghdr
*n
, struct tcf_proto
*tp
,
1893 struct tcf_block
*block
, struct Qdisc
*q
,
1894 u32 parent
, void *fh
, int event
, bool unicast
,
1897 struct sk_buff
*skb
;
1898 u32 portid
= oskb
? NETLINK_CB(oskb
).portid
: 0;
1901 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
1905 if (tcf_fill_node(net
, skb
, tp
, block
, q
, parent
, fh
, portid
,
1906 n
->nlmsg_seq
, n
->nlmsg_flags
, event
,
1913 err
= netlink_unicast(net
->rtnl
, skb
, portid
, MSG_DONTWAIT
);
1915 err
= rtnetlink_send(skb
, net
, portid
, RTNLGRP_TC
,
1916 n
->nlmsg_flags
& NLM_F_ECHO
);
1923 static int tfilter_del_notify(struct net
*net
, struct sk_buff
*oskb
,
1924 struct nlmsghdr
*n
, struct tcf_proto
*tp
,
1925 struct tcf_block
*block
, struct Qdisc
*q
,
1926 u32 parent
, void *fh
, bool unicast
, bool *last
,
1927 bool rtnl_held
, struct netlink_ext_ack
*extack
)
1929 struct sk_buff
*skb
;
1930 u32 portid
= oskb
? NETLINK_CB(oskb
).portid
: 0;
1933 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
1937 if (tcf_fill_node(net
, skb
, tp
, block
, q
, parent
, fh
, portid
,
1938 n
->nlmsg_seq
, n
->nlmsg_flags
, RTM_DELTFILTER
,
1940 NL_SET_ERR_MSG(extack
, "Failed to build del event notification");
1945 err
= tp
->ops
->delete(tp
, fh
, last
, rtnl_held
, extack
);
1952 err
= netlink_unicast(net
->rtnl
, skb
, portid
, MSG_DONTWAIT
);
1954 err
= rtnetlink_send(skb
, net
, portid
, RTNLGRP_TC
,
1955 n
->nlmsg_flags
& NLM_F_ECHO
);
1957 NL_SET_ERR_MSG(extack
, "Failed to send filter delete notification");
1964 static void tfilter_notify_chain(struct net
*net
, struct sk_buff
*oskb
,
1965 struct tcf_block
*block
, struct Qdisc
*q
,
1966 u32 parent
, struct nlmsghdr
*n
,
1967 struct tcf_chain
*chain
, int event
,
1970 struct tcf_proto
*tp
;
1972 for (tp
= tcf_get_next_proto(chain
, NULL
, rtnl_held
);
1973 tp
; tp
= tcf_get_next_proto(chain
, tp
, rtnl_held
))
1974 tfilter_notify(net
, oskb
, n
, tp
, block
,
1975 q
, parent
, NULL
, event
, false, rtnl_held
);
1978 static void tfilter_put(struct tcf_proto
*tp
, void *fh
)
1980 if (tp
->ops
->put
&& fh
)
1981 tp
->ops
->put(tp
, fh
);
1984 static int tc_new_tfilter(struct sk_buff
*skb
, struct nlmsghdr
*n
,
1985 struct netlink_ext_ack
*extack
)
1987 struct net
*net
= sock_net(skb
->sk
);
1988 struct nlattr
*tca
[TCA_MAX
+ 1];
1989 char name
[IFNAMSIZ
];
1996 struct Qdisc
*q
= NULL
;
1997 struct tcf_chain_info chain_info
;
1998 struct tcf_chain
*chain
= NULL
;
1999 struct tcf_block
*block
;
2000 struct tcf_proto
*tp
;
2005 bool rtnl_held
= false;
2007 if (!netlink_ns_capable(skb
, net
->user_ns
, CAP_NET_ADMIN
))
2013 err
= nlmsg_parse_deprecated(n
, sizeof(*t
), tca
, TCA_MAX
,
2014 rtm_tca_policy
, extack
);
2019 protocol
= TC_H_MIN(t
->tcm_info
);
2020 prio
= TC_H_MAJ(t
->tcm_info
);
2021 prio_allocate
= false;
2022 parent
= t
->tcm_parent
;
2028 /* If no priority is provided by the user,
2031 if (n
->nlmsg_flags
& NLM_F_CREATE
) {
2032 prio
= TC_H_MAKE(0x80000000U
, 0U);
2033 prio_allocate
= true;
2035 NL_SET_ERR_MSG(extack
, "Invalid filter command with priority of zero");
2040 /* Find head of filter chain. */
2042 err
= __tcf_qdisc_find(net
, &q
, &parent
, t
->tcm_ifindex
, false, extack
);
2046 if (tcf_proto_check_kind(tca
[TCA_KIND
], name
)) {
2047 NL_SET_ERR_MSG(extack
, "Specified TC filter name too long");
2052 /* Take rtnl mutex if rtnl_held was set to true on previous iteration,
2053 * block is shared (no qdisc found), qdisc is not unlocked, classifier
2054 * type is not specified, classifier is not unlocked.
2057 (q
&& !(q
->ops
->cl_ops
->flags
& QDISC_CLASS_OPS_DOIT_UNLOCKED
)) ||
2058 !tcf_proto_is_unlocked(name
)) {
2063 err
= __tcf_qdisc_cl_find(q
, parent
, &cl
, t
->tcm_ifindex
, extack
);
2067 block
= __tcf_block_find(net
, q
, cl
, t
->tcm_ifindex
, t
->tcm_block_index
,
2069 if (IS_ERR(block
)) {
2070 err
= PTR_ERR(block
);
2073 block
->classid
= parent
;
2075 chain_index
= tca
[TCA_CHAIN
] ? nla_get_u32(tca
[TCA_CHAIN
]) : 0;
2076 if (chain_index
> TC_ACT_EXT_VAL_MASK
) {
2077 NL_SET_ERR_MSG(extack
, "Specified chain index exceeds upper limit");
2081 chain
= tcf_chain_get(block
, chain_index
, true);
2083 NL_SET_ERR_MSG(extack
, "Cannot create specified filter chain");
2088 mutex_lock(&chain
->filter_chain_lock
);
2089 tp
= tcf_chain_tp_find(chain
, &chain_info
, protocol
,
2090 prio
, prio_allocate
);
2092 NL_SET_ERR_MSG(extack
, "Filter with specified priority/protocol not found");
2098 struct tcf_proto
*tp_new
= NULL
;
2100 if (chain
->flushing
) {
2105 /* Proto-tcf does not exist, create new one */
2107 if (tca
[TCA_KIND
] == NULL
|| !protocol
) {
2108 NL_SET_ERR_MSG(extack
, "Filter kind and protocol must be specified");
2113 if (!(n
->nlmsg_flags
& NLM_F_CREATE
)) {
2114 NL_SET_ERR_MSG(extack
, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2120 prio
= tcf_auto_prio(tcf_chain_tp_prev(chain
,
2123 mutex_unlock(&chain
->filter_chain_lock
);
2124 tp_new
= tcf_proto_create(name
, protocol
, prio
, chain
,
2126 if (IS_ERR(tp_new
)) {
2127 err
= PTR_ERR(tp_new
);
2132 tp
= tcf_chain_tp_insert_unique(chain
, tp_new
, protocol
, prio
,
2139 mutex_unlock(&chain
->filter_chain_lock
);
2142 if (tca
[TCA_KIND
] && nla_strcmp(tca
[TCA_KIND
], tp
->ops
->kind
)) {
2143 NL_SET_ERR_MSG(extack
, "Specified filter kind does not match existing one");
2148 fh
= tp
->ops
->get(tp
, t
->tcm_handle
);
2151 if (!(n
->nlmsg_flags
& NLM_F_CREATE
)) {
2152 NL_SET_ERR_MSG(extack
, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2156 } else if (n
->nlmsg_flags
& NLM_F_EXCL
) {
2157 tfilter_put(tp
, fh
);
2158 NL_SET_ERR_MSG(extack
, "Filter already exists");
2163 if (chain
->tmplt_ops
&& chain
->tmplt_ops
!= tp
->ops
) {
2164 NL_SET_ERR_MSG(extack
, "Chain template is set to a different filter kind");
2169 err
= tp
->ops
->change(net
, skb
, tp
, cl
, t
->tcm_handle
, tca
, &fh
,
2170 n
->nlmsg_flags
& NLM_F_CREATE
? TCA_ACT_NOREPLACE
: TCA_ACT_REPLACE
,
2173 tfilter_notify(net
, skb
, n
, tp
, block
, q
, parent
, fh
,
2174 RTM_NEWTFILTER
, false, rtnl_held
);
2175 tfilter_put(tp
, fh
);
2176 /* q pointer is NULL for shared blocks */
2178 q
->flags
&= ~TCQ_F_CAN_BYPASS
;
2182 if (err
&& tp_created
)
2183 tcf_chain_tp_delete_empty(chain
, tp
, rtnl_held
, NULL
);
2186 if (tp
&& !IS_ERR(tp
))
2187 tcf_proto_put(tp
, rtnl_held
, NULL
);
2189 tcf_chain_put(chain
);
2191 tcf_block_release(q
, block
, rtnl_held
);
2196 if (err
== -EAGAIN
) {
2197 /* Take rtnl lock in case EAGAIN is caused by concurrent flush
2201 /* Replay the request. */
2207 mutex_unlock(&chain
->filter_chain_lock
);
2211 static int tc_del_tfilter(struct sk_buff
*skb
, struct nlmsghdr
*n
,
2212 struct netlink_ext_ack
*extack
)
2214 struct net
*net
= sock_net(skb
->sk
);
2215 struct nlattr
*tca
[TCA_MAX
+ 1];
2216 char name
[IFNAMSIZ
];
2222 struct Qdisc
*q
= NULL
;
2223 struct tcf_chain_info chain_info
;
2224 struct tcf_chain
*chain
= NULL
;
2225 struct tcf_block
*block
= NULL
;
2226 struct tcf_proto
*tp
= NULL
;
2227 unsigned long cl
= 0;
2230 bool rtnl_held
= false;
2232 if (!netlink_ns_capable(skb
, net
->user_ns
, CAP_NET_ADMIN
))
2235 err
= nlmsg_parse_deprecated(n
, sizeof(*t
), tca
, TCA_MAX
,
2236 rtm_tca_policy
, extack
);
2241 protocol
= TC_H_MIN(t
->tcm_info
);
2242 prio
= TC_H_MAJ(t
->tcm_info
);
2243 parent
= t
->tcm_parent
;
2245 if (prio
== 0 && (protocol
|| t
->tcm_handle
|| tca
[TCA_KIND
])) {
2246 NL_SET_ERR_MSG(extack
, "Cannot flush filters with protocol, handle or kind set");
2250 /* Find head of filter chain. */
2252 err
= __tcf_qdisc_find(net
, &q
, &parent
, t
->tcm_ifindex
, false, extack
);
2256 if (tcf_proto_check_kind(tca
[TCA_KIND
], name
)) {
2257 NL_SET_ERR_MSG(extack
, "Specified TC filter name too long");
2261 /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
2262 * found), qdisc is not unlocked, classifier type is not specified,
2263 * classifier is not unlocked.
2266 (q
&& !(q
->ops
->cl_ops
->flags
& QDISC_CLASS_OPS_DOIT_UNLOCKED
)) ||
2267 !tcf_proto_is_unlocked(name
)) {
2272 err
= __tcf_qdisc_cl_find(q
, parent
, &cl
, t
->tcm_ifindex
, extack
);
2276 block
= __tcf_block_find(net
, q
, cl
, t
->tcm_ifindex
, t
->tcm_block_index
,
2278 if (IS_ERR(block
)) {
2279 err
= PTR_ERR(block
);
2283 chain_index
= tca
[TCA_CHAIN
] ? nla_get_u32(tca
[TCA_CHAIN
]) : 0;
2284 if (chain_index
> TC_ACT_EXT_VAL_MASK
) {
2285 NL_SET_ERR_MSG(extack
, "Specified chain index exceeds upper limit");
2289 chain
= tcf_chain_get(block
, chain_index
, false);
2291 /* User requested flush on non-existent chain. Nothing to do,
2292 * so just return success.
2298 NL_SET_ERR_MSG(extack
, "Cannot find specified filter chain");
2304 tfilter_notify_chain(net
, skb
, block
, q
, parent
, n
,
2305 chain
, RTM_DELTFILTER
, rtnl_held
);
2306 tcf_chain_flush(chain
, rtnl_held
);
2311 mutex_lock(&chain
->filter_chain_lock
);
2312 tp
= tcf_chain_tp_find(chain
, &chain_info
, protocol
,
2314 if (!tp
|| IS_ERR(tp
)) {
2315 NL_SET_ERR_MSG(extack
, "Filter with specified priority/protocol not found");
2316 err
= tp
? PTR_ERR(tp
) : -ENOENT
;
2318 } else if (tca
[TCA_KIND
] && nla_strcmp(tca
[TCA_KIND
], tp
->ops
->kind
)) {
2319 NL_SET_ERR_MSG(extack
, "Specified filter kind does not match existing one");
2322 } else if (t
->tcm_handle
== 0) {
2323 tcf_proto_signal_destroying(chain
, tp
);
2324 tcf_chain_tp_remove(chain
, &chain_info
, tp
);
2325 mutex_unlock(&chain
->filter_chain_lock
);
2327 tcf_proto_put(tp
, rtnl_held
, NULL
);
2328 tfilter_notify(net
, skb
, n
, tp
, block
, q
, parent
, fh
,
2329 RTM_DELTFILTER
, false, rtnl_held
);
2333 mutex_unlock(&chain
->filter_chain_lock
);
2335 fh
= tp
->ops
->get(tp
, t
->tcm_handle
);
2338 NL_SET_ERR_MSG(extack
, "Specified filter handle not found");
2343 err
= tfilter_del_notify(net
, skb
, n
, tp
, block
,
2344 q
, parent
, fh
, false, &last
,
2350 tcf_chain_tp_delete_empty(chain
, tp
, rtnl_held
, extack
);
2355 if (tp
&& !IS_ERR(tp
))
2356 tcf_proto_put(tp
, rtnl_held
, NULL
);
2357 tcf_chain_put(chain
);
2359 tcf_block_release(q
, block
, rtnl_held
);
2367 mutex_unlock(&chain
->filter_chain_lock
);
2371 static int tc_get_tfilter(struct sk_buff
*skb
, struct nlmsghdr
*n
,
2372 struct netlink_ext_ack
*extack
)
2374 struct net
*net
= sock_net(skb
->sk
);
2375 struct nlattr
*tca
[TCA_MAX
+ 1];
2376 char name
[IFNAMSIZ
];
2382 struct Qdisc
*q
= NULL
;
2383 struct tcf_chain_info chain_info
;
2384 struct tcf_chain
*chain
= NULL
;
2385 struct tcf_block
*block
= NULL
;
2386 struct tcf_proto
*tp
= NULL
;
2387 unsigned long cl
= 0;
2390 bool rtnl_held
= false;
2392 err
= nlmsg_parse_deprecated(n
, sizeof(*t
), tca
, TCA_MAX
,
2393 rtm_tca_policy
, extack
);
2398 protocol
= TC_H_MIN(t
->tcm_info
);
2399 prio
= TC_H_MAJ(t
->tcm_info
);
2400 parent
= t
->tcm_parent
;
2403 NL_SET_ERR_MSG(extack
, "Invalid filter command with priority of zero");
2407 /* Find head of filter chain. */
2409 err
= __tcf_qdisc_find(net
, &q
, &parent
, t
->tcm_ifindex
, false, extack
);
2413 if (tcf_proto_check_kind(tca
[TCA_KIND
], name
)) {
2414 NL_SET_ERR_MSG(extack
, "Specified TC filter name too long");
2418 /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
2419 * unlocked, classifier type is not specified, classifier is not
2422 if ((q
&& !(q
->ops
->cl_ops
->flags
& QDISC_CLASS_OPS_DOIT_UNLOCKED
)) ||
2423 !tcf_proto_is_unlocked(name
)) {
2428 err
= __tcf_qdisc_cl_find(q
, parent
, &cl
, t
->tcm_ifindex
, extack
);
2432 block
= __tcf_block_find(net
, q
, cl
, t
->tcm_ifindex
, t
->tcm_block_index
,
2434 if (IS_ERR(block
)) {
2435 err
= PTR_ERR(block
);
2439 chain_index
= tca
[TCA_CHAIN
] ? nla_get_u32(tca
[TCA_CHAIN
]) : 0;
2440 if (chain_index
> TC_ACT_EXT_VAL_MASK
) {
2441 NL_SET_ERR_MSG(extack
, "Specified chain index exceeds upper limit");
2445 chain
= tcf_chain_get(block
, chain_index
, false);
2447 NL_SET_ERR_MSG(extack
, "Cannot find specified filter chain");
2452 mutex_lock(&chain
->filter_chain_lock
);
2453 tp
= tcf_chain_tp_find(chain
, &chain_info
, protocol
,
2455 mutex_unlock(&chain
->filter_chain_lock
);
2456 if (!tp
|| IS_ERR(tp
)) {
2457 NL_SET_ERR_MSG(extack
, "Filter with specified priority/protocol not found");
2458 err
= tp
? PTR_ERR(tp
) : -ENOENT
;
2460 } else if (tca
[TCA_KIND
] && nla_strcmp(tca
[TCA_KIND
], tp
->ops
->kind
)) {
2461 NL_SET_ERR_MSG(extack
, "Specified filter kind does not match existing one");
2466 fh
= tp
->ops
->get(tp
, t
->tcm_handle
);
2469 NL_SET_ERR_MSG(extack
, "Specified filter handle not found");
2472 err
= tfilter_notify(net
, skb
, n
, tp
, block
, q
, parent
,
2473 fh
, RTM_NEWTFILTER
, true, rtnl_held
);
2475 NL_SET_ERR_MSG(extack
, "Failed to send filter notify message");
2478 tfilter_put(tp
, fh
);
2481 if (tp
&& !IS_ERR(tp
))
2482 tcf_proto_put(tp
, rtnl_held
, NULL
);
2483 tcf_chain_put(chain
);
2485 tcf_block_release(q
, block
, rtnl_held
);
2493 struct tcf_dump_args
{
2494 struct tcf_walker w
;
2495 struct sk_buff
*skb
;
2496 struct netlink_callback
*cb
;
2497 struct tcf_block
*block
;
2502 static int tcf_node_dump(struct tcf_proto
*tp
, void *n
, struct tcf_walker
*arg
)
2504 struct tcf_dump_args
*a
= (void *)arg
;
2505 struct net
*net
= sock_net(a
->skb
->sk
);
2507 return tcf_fill_node(net
, a
->skb
, tp
, a
->block
, a
->q
, a
->parent
,
2508 n
, NETLINK_CB(a
->cb
->skb
).portid
,
2509 a
->cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
,
2510 RTM_NEWTFILTER
, true);
2513 static bool tcf_chain_dump(struct tcf_chain
*chain
, struct Qdisc
*q
, u32 parent
,
2514 struct sk_buff
*skb
, struct netlink_callback
*cb
,
2515 long index_start
, long *p_index
)
2517 struct net
*net
= sock_net(skb
->sk
);
2518 struct tcf_block
*block
= chain
->block
;
2519 struct tcmsg
*tcm
= nlmsg_data(cb
->nlh
);
2520 struct tcf_proto
*tp
, *tp_prev
;
2521 struct tcf_dump_args arg
;
2523 for (tp
= __tcf_get_next_proto(chain
, NULL
);
2526 tp
= __tcf_get_next_proto(chain
, tp
),
2527 tcf_proto_put(tp_prev
, true, NULL
),
2529 if (*p_index
< index_start
)
2531 if (TC_H_MAJ(tcm
->tcm_info
) &&
2532 TC_H_MAJ(tcm
->tcm_info
) != tp
->prio
)
2534 if (TC_H_MIN(tcm
->tcm_info
) &&
2535 TC_H_MIN(tcm
->tcm_info
) != tp
->protocol
)
2537 if (*p_index
> index_start
)
2538 memset(&cb
->args
[1], 0,
2539 sizeof(cb
->args
) - sizeof(cb
->args
[0]));
2540 if (cb
->args
[1] == 0) {
2541 if (tcf_fill_node(net
, skb
, tp
, block
, q
, parent
, NULL
,
2542 NETLINK_CB(cb
->skb
).portid
,
2543 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
,
2544 RTM_NEWTFILTER
, true) <= 0)
2550 arg
.w
.fn
= tcf_node_dump
;
2555 arg
.parent
= parent
;
2557 arg
.w
.skip
= cb
->args
[1] - 1;
2559 arg
.w
.cookie
= cb
->args
[2];
2560 tp
->ops
->walk(tp
, &arg
.w
, true);
2561 cb
->args
[2] = arg
.w
.cookie
;
2562 cb
->args
[1] = arg
.w
.count
+ 1;
2569 tcf_proto_put(tp
, true, NULL
);
2573 /* called with RTNL */
2574 static int tc_dump_tfilter(struct sk_buff
*skb
, struct netlink_callback
*cb
)
2576 struct tcf_chain
*chain
, *chain_prev
;
2577 struct net
*net
= sock_net(skb
->sk
);
2578 struct nlattr
*tca
[TCA_MAX
+ 1];
2579 struct Qdisc
*q
= NULL
;
2580 struct tcf_block
*block
;
2581 struct tcmsg
*tcm
= nlmsg_data(cb
->nlh
);
2587 if (nlmsg_len(cb
->nlh
) < sizeof(*tcm
))
2590 err
= nlmsg_parse_deprecated(cb
->nlh
, sizeof(*tcm
), tca
, TCA_MAX
,
2595 if (tcm
->tcm_ifindex
== TCM_IFINDEX_MAGIC_BLOCK
) {
2596 block
= tcf_block_refcnt_get(net
, tcm
->tcm_block_index
);
2599 /* If we work with block index, q is NULL and parent value
2600 * will never be used in the following code. The check
2601 * in tcf_fill_node prevents it. However, compiler does not
2602 * see that far, so set parent to zero to silence the warning
2603 * about parent being uninitialized.
2607 const struct Qdisc_class_ops
*cops
;
2608 struct net_device
*dev
;
2609 unsigned long cl
= 0;
2611 dev
= __dev_get_by_index(net
, tcm
->tcm_ifindex
);
2615 parent
= tcm
->tcm_parent
;
2619 q
= qdisc_lookup(dev
, TC_H_MAJ(tcm
->tcm_parent
));
2622 cops
= q
->ops
->cl_ops
;
2625 if (!cops
->tcf_block
)
2627 if (TC_H_MIN(tcm
->tcm_parent
)) {
2628 cl
= cops
->find(q
, tcm
->tcm_parent
);
2632 block
= cops
->tcf_block(q
, cl
, NULL
);
2635 parent
= block
->classid
;
2636 if (tcf_block_shared(block
))
2640 index_start
= cb
->args
[0];
2643 for (chain
= __tcf_get_next_chain(block
, NULL
);
2646 chain
= __tcf_get_next_chain(block
, chain
),
2647 tcf_chain_put(chain_prev
)) {
2648 if (tca
[TCA_CHAIN
] &&
2649 nla_get_u32(tca
[TCA_CHAIN
]) != chain
->index
)
2651 if (!tcf_chain_dump(chain
, q
, parent
, skb
, cb
,
2652 index_start
, &index
)) {
2653 tcf_chain_put(chain
);
2659 if (tcm
->tcm_ifindex
== TCM_IFINDEX_MAGIC_BLOCK
)
2660 tcf_block_refcnt_put(block
, true);
2661 cb
->args
[0] = index
;
2664 /* If we did no progress, the error (EMSGSIZE) is real */
2665 if (skb
->len
== 0 && err
)
2670 static int tc_chain_fill_node(const struct tcf_proto_ops
*tmplt_ops
,
2671 void *tmplt_priv
, u32 chain_index
,
2672 struct net
*net
, struct sk_buff
*skb
,
2673 struct tcf_block
*block
,
2674 u32 portid
, u32 seq
, u16 flags
, int event
)
2676 unsigned char *b
= skb_tail_pointer(skb
);
2677 const struct tcf_proto_ops
*ops
;
2678 struct nlmsghdr
*nlh
;
2685 nlh
= nlmsg_put(skb
, portid
, seq
, event
, sizeof(*tcm
), flags
);
2687 goto out_nlmsg_trim
;
2688 tcm
= nlmsg_data(nlh
);
2689 tcm
->tcm_family
= AF_UNSPEC
;
2692 tcm
->tcm_handle
= 0;
2694 tcm
->tcm_ifindex
= qdisc_dev(block
->q
)->ifindex
;
2695 tcm
->tcm_parent
= block
->q
->handle
;
2697 tcm
->tcm_ifindex
= TCM_IFINDEX_MAGIC_BLOCK
;
2698 tcm
->tcm_block_index
= block
->index
;
2701 if (nla_put_u32(skb
, TCA_CHAIN
, chain_index
))
2702 goto nla_put_failure
;
2705 if (nla_put_string(skb
, TCA_KIND
, ops
->kind
))
2706 goto nla_put_failure
;
2707 if (ops
->tmplt_dump(skb
, net
, priv
) < 0)
2708 goto nla_put_failure
;
2711 nlh
->nlmsg_len
= skb_tail_pointer(skb
) - b
;
2720 static int tc_chain_notify(struct tcf_chain
*chain
, struct sk_buff
*oskb
,
2721 u32 seq
, u16 flags
, int event
, bool unicast
)
2723 u32 portid
= oskb
? NETLINK_CB(oskb
).portid
: 0;
2724 struct tcf_block
*block
= chain
->block
;
2725 struct net
*net
= block
->net
;
2726 struct sk_buff
*skb
;
2729 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
2733 if (tc_chain_fill_node(chain
->tmplt_ops
, chain
->tmplt_priv
,
2734 chain
->index
, net
, skb
, block
, portid
,
2735 seq
, flags
, event
) <= 0) {
2741 err
= netlink_unicast(net
->rtnl
, skb
, portid
, MSG_DONTWAIT
);
2743 err
= rtnetlink_send(skb
, net
, portid
, RTNLGRP_TC
,
2744 flags
& NLM_F_ECHO
);
2751 static int tc_chain_notify_delete(const struct tcf_proto_ops
*tmplt_ops
,
2752 void *tmplt_priv
, u32 chain_index
,
2753 struct tcf_block
*block
, struct sk_buff
*oskb
,
2754 u32 seq
, u16 flags
, bool unicast
)
2756 u32 portid
= oskb
? NETLINK_CB(oskb
).portid
: 0;
2757 struct net
*net
= block
->net
;
2758 struct sk_buff
*skb
;
2760 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
2764 if (tc_chain_fill_node(tmplt_ops
, tmplt_priv
, chain_index
, net
, skb
,
2765 block
, portid
, seq
, flags
, RTM_DELCHAIN
) <= 0) {
2771 return netlink_unicast(net
->rtnl
, skb
, portid
, MSG_DONTWAIT
);
2773 return rtnetlink_send(skb
, net
, portid
, RTNLGRP_TC
, flags
& NLM_F_ECHO
);
2776 static int tc_chain_tmplt_add(struct tcf_chain
*chain
, struct net
*net
,
2777 struct nlattr
**tca
,
2778 struct netlink_ext_ack
*extack
)
2780 const struct tcf_proto_ops
*ops
;
2781 char name
[IFNAMSIZ
];
2784 /* If kind is not set, user did not specify template. */
2788 if (tcf_proto_check_kind(tca
[TCA_KIND
], name
)) {
2789 NL_SET_ERR_MSG(extack
, "Specified TC chain template name too long");
2793 ops
= tcf_proto_lookup_ops(name
, true, extack
);
2795 return PTR_ERR(ops
);
2796 if (!ops
->tmplt_create
|| !ops
->tmplt_destroy
|| !ops
->tmplt_dump
) {
2797 NL_SET_ERR_MSG(extack
, "Chain templates are not supported with specified classifier");
2801 tmplt_priv
= ops
->tmplt_create(net
, chain
, tca
, extack
);
2802 if (IS_ERR(tmplt_priv
)) {
2803 module_put(ops
->owner
);
2804 return PTR_ERR(tmplt_priv
);
2806 chain
->tmplt_ops
= ops
;
2807 chain
->tmplt_priv
= tmplt_priv
;
2811 static void tc_chain_tmplt_del(const struct tcf_proto_ops
*tmplt_ops
,
2814 /* If template ops are set, no work to do for us. */
2818 tmplt_ops
->tmplt_destroy(tmplt_priv
);
2819 module_put(tmplt_ops
->owner
);
2822 /* Add/delete/get a chain */
2824 static int tc_ctl_chain(struct sk_buff
*skb
, struct nlmsghdr
*n
,
2825 struct netlink_ext_ack
*extack
)
2827 struct net
*net
= sock_net(skb
->sk
);
2828 struct nlattr
*tca
[TCA_MAX
+ 1];
2832 struct Qdisc
*q
= NULL
;
2833 struct tcf_chain
*chain
= NULL
;
2834 struct tcf_block
*block
;
2838 if (n
->nlmsg_type
!= RTM_GETCHAIN
&&
2839 !netlink_ns_capable(skb
, net
->user_ns
, CAP_NET_ADMIN
))
2843 err
= nlmsg_parse_deprecated(n
, sizeof(*t
), tca
, TCA_MAX
,
2844 rtm_tca_policy
, extack
);
2849 parent
= t
->tcm_parent
;
2852 block
= tcf_block_find(net
, &q
, &parent
, &cl
,
2853 t
->tcm_ifindex
, t
->tcm_block_index
, extack
);
2855 return PTR_ERR(block
);
2857 chain_index
= tca
[TCA_CHAIN
] ? nla_get_u32(tca
[TCA_CHAIN
]) : 0;
2858 if (chain_index
> TC_ACT_EXT_VAL_MASK
) {
2859 NL_SET_ERR_MSG(extack
, "Specified chain index exceeds upper limit");
2864 mutex_lock(&block
->lock
);
2865 chain
= tcf_chain_lookup(block
, chain_index
);
2866 if (n
->nlmsg_type
== RTM_NEWCHAIN
) {
2868 if (tcf_chain_held_by_acts_only(chain
)) {
2869 /* The chain exists only because there is
2870 * some action referencing it.
2872 tcf_chain_hold(chain
);
2874 NL_SET_ERR_MSG(extack
, "Filter chain already exists");
2876 goto errout_block_locked
;
2879 if (!(n
->nlmsg_flags
& NLM_F_CREATE
)) {
2880 NL_SET_ERR_MSG(extack
, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
2882 goto errout_block_locked
;
2884 chain
= tcf_chain_create(block
, chain_index
);
2886 NL_SET_ERR_MSG(extack
, "Failed to create filter chain");
2888 goto errout_block_locked
;
2892 if (!chain
|| tcf_chain_held_by_acts_only(chain
)) {
2893 NL_SET_ERR_MSG(extack
, "Cannot find specified filter chain");
2895 goto errout_block_locked
;
2897 tcf_chain_hold(chain
);
2900 if (n
->nlmsg_type
== RTM_NEWCHAIN
) {
2901 /* Modifying chain requires holding parent block lock. In case
2902 * the chain was successfully added, take a reference to the
2903 * chain. This ensures that an empty chain does not disappear at
2904 * the end of this function.
2906 tcf_chain_hold(chain
);
2907 chain
->explicitly_created
= true;
2909 mutex_unlock(&block
->lock
);
2911 switch (n
->nlmsg_type
) {
2913 err
= tc_chain_tmplt_add(chain
, net
, tca
, extack
);
2915 tcf_chain_put_explicitly_created(chain
);
2919 tc_chain_notify(chain
, NULL
, 0, NLM_F_CREATE
| NLM_F_EXCL
,
2920 RTM_NEWCHAIN
, false);
2923 tfilter_notify_chain(net
, skb
, block
, q
, parent
, n
,
2924 chain
, RTM_DELTFILTER
, true);
2925 /* Flush the chain first as the user requested chain removal. */
2926 tcf_chain_flush(chain
, true);
2927 /* In case the chain was successfully deleted, put a reference
2928 * to the chain previously taken during addition.
2930 tcf_chain_put_explicitly_created(chain
);
2933 err
= tc_chain_notify(chain
, skb
, n
->nlmsg_seq
,
2934 n
->nlmsg_seq
, n
->nlmsg_type
, true);
2936 NL_SET_ERR_MSG(extack
, "Failed to send chain notify message");
2940 NL_SET_ERR_MSG(extack
, "Unsupported message type");
2945 tcf_chain_put(chain
);
2947 tcf_block_release(q
, block
, true);
2949 /* Replay the request. */
2953 errout_block_locked
:
2954 mutex_unlock(&block
->lock
);
2958 /* called with RTNL */
2959 static int tc_dump_chain(struct sk_buff
*skb
, struct netlink_callback
*cb
)
2961 struct net
*net
= sock_net(skb
->sk
);
2962 struct nlattr
*tca
[TCA_MAX
+ 1];
2963 struct Qdisc
*q
= NULL
;
2964 struct tcf_block
*block
;
2965 struct tcmsg
*tcm
= nlmsg_data(cb
->nlh
);
2966 struct tcf_chain
*chain
;
2972 if (nlmsg_len(cb
->nlh
) < sizeof(*tcm
))
2975 err
= nlmsg_parse_deprecated(cb
->nlh
, sizeof(*tcm
), tca
, TCA_MAX
,
2976 rtm_tca_policy
, cb
->extack
);
2980 if (tcm
->tcm_ifindex
== TCM_IFINDEX_MAGIC_BLOCK
) {
2981 block
= tcf_block_refcnt_get(net
, tcm
->tcm_block_index
);
2984 /* If we work with block index, q is NULL and parent value
2985 * will never be used in the following code. The check
2986 * in tcf_fill_node prevents it. However, compiler does not
2987 * see that far, so set parent to zero to silence the warning
2988 * about parent being uninitialized.
2992 const struct Qdisc_class_ops
*cops
;
2993 struct net_device
*dev
;
2994 unsigned long cl
= 0;
2996 dev
= __dev_get_by_index(net
, tcm
->tcm_ifindex
);
3000 parent
= tcm
->tcm_parent
;
3005 q
= qdisc_lookup(dev
, TC_H_MAJ(tcm
->tcm_parent
));
3009 cops
= q
->ops
->cl_ops
;
3012 if (!cops
->tcf_block
)
3014 if (TC_H_MIN(tcm
->tcm_parent
)) {
3015 cl
= cops
->find(q
, tcm
->tcm_parent
);
3019 block
= cops
->tcf_block(q
, cl
, NULL
);
3022 if (tcf_block_shared(block
))
3026 index_start
= cb
->args
[0];
3029 mutex_lock(&block
->lock
);
3030 list_for_each_entry(chain
, &block
->chain_list
, list
) {
3031 if ((tca
[TCA_CHAIN
] &&
3032 nla_get_u32(tca
[TCA_CHAIN
]) != chain
->index
))
3034 if (index
< index_start
) {
3038 if (tcf_chain_held_by_acts_only(chain
))
3040 err
= tc_chain_fill_node(chain
->tmplt_ops
, chain
->tmplt_priv
,
3041 chain
->index
, net
, skb
, block
,
3042 NETLINK_CB(cb
->skb
).portid
,
3043 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
,
3049 mutex_unlock(&block
->lock
);
3051 if (tcm
->tcm_ifindex
== TCM_IFINDEX_MAGIC_BLOCK
)
3052 tcf_block_refcnt_put(block
, true);
3053 cb
->args
[0] = index
;
3056 /* If we did no progress, the error (EMSGSIZE) is real */
3057 if (skb
->len
== 0 && err
)
3062 void tcf_exts_destroy(struct tcf_exts
*exts
)
3064 #ifdef CONFIG_NET_CLS_ACT
3065 if (exts
->actions
) {
3066 tcf_action_destroy(exts
->actions
, TCA_ACT_UNBIND
);
3067 kfree(exts
->actions
);
3069 exts
->nr_actions
= 0;
3072 EXPORT_SYMBOL(tcf_exts_destroy
);
3074 int tcf_exts_validate(struct net
*net
, struct tcf_proto
*tp
, struct nlattr
**tb
,
3075 struct nlattr
*rate_tlv
, struct tcf_exts
*exts
, bool ovr
,
3076 bool rtnl_held
, struct netlink_ext_ack
*extack
)
3078 #ifdef CONFIG_NET_CLS_ACT
3080 struct tc_action
*act
;
3081 size_t attr_size
= 0;
3083 if (exts
->police
&& tb
[exts
->police
]) {
3084 act
= tcf_action_init_1(net
, tp
, tb
[exts
->police
],
3085 rate_tlv
, "police", ovr
,
3086 TCA_ACT_BIND
, rtnl_held
,
3089 return PTR_ERR(act
);
3091 act
->type
= exts
->type
= TCA_OLD_COMPAT
;
3092 exts
->actions
[0] = act
;
3093 exts
->nr_actions
= 1;
3094 } else if (exts
->action
&& tb
[exts
->action
]) {
3097 err
= tcf_action_init(net
, tp
, tb
[exts
->action
],
3098 rate_tlv
, NULL
, ovr
, TCA_ACT_BIND
,
3099 exts
->actions
, &attr_size
,
3103 exts
->nr_actions
= err
;
3107 if ((exts
->action
&& tb
[exts
->action
]) ||
3108 (exts
->police
&& tb
[exts
->police
])) {
3109 NL_SET_ERR_MSG(extack
, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)");
3116 EXPORT_SYMBOL(tcf_exts_validate
);
3118 void tcf_exts_change(struct tcf_exts
*dst
, struct tcf_exts
*src
)
3120 #ifdef CONFIG_NET_CLS_ACT
3121 struct tcf_exts old
= *dst
;
3124 tcf_exts_destroy(&old
);
3127 EXPORT_SYMBOL(tcf_exts_change
);
3129 #ifdef CONFIG_NET_CLS_ACT
3130 static struct tc_action
*tcf_exts_first_act(struct tcf_exts
*exts
)
3132 if (exts
->nr_actions
== 0)
3135 return exts
->actions
[0];
3139 int tcf_exts_dump(struct sk_buff
*skb
, struct tcf_exts
*exts
)
3141 #ifdef CONFIG_NET_CLS_ACT
3142 struct nlattr
*nest
;
3144 if (exts
->action
&& tcf_exts_has_actions(exts
)) {
3146 * again for backward compatible mode - we want
3147 * to work with both old and new modes of entering
3148 * tc data even if iproute2 was newer - jhs
3150 if (exts
->type
!= TCA_OLD_COMPAT
) {
3151 nest
= nla_nest_start_noflag(skb
, exts
->action
);
3153 goto nla_put_failure
;
3155 if (tcf_action_dump(skb
, exts
->actions
, 0, 0) < 0)
3156 goto nla_put_failure
;
3157 nla_nest_end(skb
, nest
);
3158 } else if (exts
->police
) {
3159 struct tc_action
*act
= tcf_exts_first_act(exts
);
3160 nest
= nla_nest_start_noflag(skb
, exts
->police
);
3161 if (nest
== NULL
|| !act
)
3162 goto nla_put_failure
;
3163 if (tcf_action_dump_old(skb
, act
, 0, 0) < 0)
3164 goto nla_put_failure
;
3165 nla_nest_end(skb
, nest
);
3171 nla_nest_cancel(skb
, nest
);
3177 EXPORT_SYMBOL(tcf_exts_dump
);
3180 int tcf_exts_dump_stats(struct sk_buff
*skb
, struct tcf_exts
*exts
)
3182 #ifdef CONFIG_NET_CLS_ACT
3183 struct tc_action
*a
= tcf_exts_first_act(exts
);
3184 if (a
!= NULL
&& tcf_action_copy_stats(skb
, a
, 1) < 0)
3189 EXPORT_SYMBOL(tcf_exts_dump_stats
);
3191 static void tcf_block_offload_inc(struct tcf_block
*block
, u32
*flags
)
3193 if (*flags
& TCA_CLS_FLAGS_IN_HW
)
3195 *flags
|= TCA_CLS_FLAGS_IN_HW
;
3196 atomic_inc(&block
->offloadcnt
);
3199 static void tcf_block_offload_dec(struct tcf_block
*block
, u32
*flags
)
3201 if (!(*flags
& TCA_CLS_FLAGS_IN_HW
))
3203 *flags
&= ~TCA_CLS_FLAGS_IN_HW
;
3204 atomic_dec(&block
->offloadcnt
);
3207 static void tc_cls_offload_cnt_update(struct tcf_block
*block
,
3208 struct tcf_proto
*tp
, u32
*cnt
,
3209 u32
*flags
, u32 diff
, bool add
)
3211 lockdep_assert_held(&block
->cb_lock
);
3213 spin_lock(&tp
->lock
);
3216 tcf_block_offload_inc(block
, flags
);
3221 tcf_block_offload_dec(block
, flags
);
3223 spin_unlock(&tp
->lock
);
3227 tc_cls_offload_cnt_reset(struct tcf_block
*block
, struct tcf_proto
*tp
,
3228 u32
*cnt
, u32
*flags
)
3230 lockdep_assert_held(&block
->cb_lock
);
3232 spin_lock(&tp
->lock
);
3233 tcf_block_offload_dec(block
, flags
);
3235 spin_unlock(&tp
->lock
);
3239 __tc_setup_cb_call(struct tcf_block
*block
, enum tc_setup_type type
,
3240 void *type_data
, bool err_stop
)
3242 struct flow_block_cb
*block_cb
;
3246 list_for_each_entry(block_cb
, &block
->flow_block
.cb_list
, list
) {
3247 err
= block_cb
->cb(type
, type_data
, block_cb
->cb_priv
);
3258 int tc_setup_cb_call(struct tcf_block
*block
, enum tc_setup_type type
,
3259 void *type_data
, bool err_stop
, bool rtnl_held
)
3261 bool take_rtnl
= READ_ONCE(block
->lockeddevcnt
) && !rtnl_held
;
3267 down_read(&block
->cb_lock
);
3268 /* Need to obtain rtnl lock if block is bound to devs that require it.
3269 * In block bind code cb_lock is obtained while holding rtnl, so we must
3270 * obtain the locks in same order here.
3272 if (!rtnl_held
&& !take_rtnl
&& block
->lockeddevcnt
) {
3273 up_read(&block
->cb_lock
);
3278 ok_count
= __tc_setup_cb_call(block
, type
, type_data
, err_stop
);
3280 up_read(&block
->cb_lock
);
3285 EXPORT_SYMBOL(tc_setup_cb_call
);
3287 /* Non-destructive filter add. If filter that wasn't already in hardware is
3288 * successfully offloaded, increment block offloads counter. On failure,
3289 * previously offloaded filter is considered to be intact and offloads counter
3290 * is not decremented.
3293 int tc_setup_cb_add(struct tcf_block
*block
, struct tcf_proto
*tp
,
3294 enum tc_setup_type type
, void *type_data
, bool err_stop
,
3295 u32
*flags
, unsigned int *in_hw_count
, bool rtnl_held
)
3297 bool take_rtnl
= READ_ONCE(block
->lockeddevcnt
) && !rtnl_held
;
3303 down_read(&block
->cb_lock
);
3304 /* Need to obtain rtnl lock if block is bound to devs that require it.
3305 * In block bind code cb_lock is obtained while holding rtnl, so we must
3306 * obtain the locks in same order here.
3308 if (!rtnl_held
&& !take_rtnl
&& block
->lockeddevcnt
) {
3309 up_read(&block
->cb_lock
);
3314 /* Make sure all netdevs sharing this block are offload-capable. */
3315 if (block
->nooffloaddevcnt
&& err_stop
) {
3316 ok_count
= -EOPNOTSUPP
;
3320 ok_count
= __tc_setup_cb_call(block
, type
, type_data
, err_stop
);
3324 if (tp
->ops
->hw_add
)
3325 tp
->ops
->hw_add(tp
, type_data
);
3327 tc_cls_offload_cnt_update(block
, tp
, in_hw_count
, flags
,
3330 up_read(&block
->cb_lock
);
3333 return ok_count
< 0 ? ok_count
: 0;
3335 EXPORT_SYMBOL(tc_setup_cb_add
);
3337 /* Destructive filter replace. If filter that wasn't already in hardware is
3338 * successfully offloaded, increment block offload counter. On failure,
3339 * previously offloaded filter is considered to be destroyed and offload counter
3343 int tc_setup_cb_replace(struct tcf_block
*block
, struct tcf_proto
*tp
,
3344 enum tc_setup_type type
, void *type_data
, bool err_stop
,
3345 u32
*old_flags
, unsigned int *old_in_hw_count
,
3346 u32
*new_flags
, unsigned int *new_in_hw_count
,
3349 bool take_rtnl
= READ_ONCE(block
->lockeddevcnt
) && !rtnl_held
;
3355 down_read(&block
->cb_lock
);
3356 /* Need to obtain rtnl lock if block is bound to devs that require it.
3357 * In block bind code cb_lock is obtained while holding rtnl, so we must
3358 * obtain the locks in same order here.
3360 if (!rtnl_held
&& !take_rtnl
&& block
->lockeddevcnt
) {
3361 up_read(&block
->cb_lock
);
3366 /* Make sure all netdevs sharing this block are offload-capable. */
3367 if (block
->nooffloaddevcnt
&& err_stop
) {
3368 ok_count
= -EOPNOTSUPP
;
3372 tc_cls_offload_cnt_reset(block
, tp
, old_in_hw_count
, old_flags
);
3373 if (tp
->ops
->hw_del
)
3374 tp
->ops
->hw_del(tp
, type_data
);
3376 ok_count
= __tc_setup_cb_call(block
, type
, type_data
, err_stop
);
3380 if (tp
->ops
->hw_add
)
3381 tp
->ops
->hw_add(tp
, type_data
);
3383 tc_cls_offload_cnt_update(block
, tp
, new_in_hw_count
,
3384 new_flags
, ok_count
, true);
3386 up_read(&block
->cb_lock
);
3389 return ok_count
< 0 ? ok_count
: 0;
3391 EXPORT_SYMBOL(tc_setup_cb_replace
);
3393 /* Destroy filter and decrement block offload counter, if filter was previously
3397 int tc_setup_cb_destroy(struct tcf_block
*block
, struct tcf_proto
*tp
,
3398 enum tc_setup_type type
, void *type_data
, bool err_stop
,
3399 u32
*flags
, unsigned int *in_hw_count
, bool rtnl_held
)
3401 bool take_rtnl
= READ_ONCE(block
->lockeddevcnt
) && !rtnl_held
;
3407 down_read(&block
->cb_lock
);
3408 /* Need to obtain rtnl lock if block is bound to devs that require it.
3409 * In block bind code cb_lock is obtained while holding rtnl, so we must
3410 * obtain the locks in same order here.
3412 if (!rtnl_held
&& !take_rtnl
&& block
->lockeddevcnt
) {
3413 up_read(&block
->cb_lock
);
3418 ok_count
= __tc_setup_cb_call(block
, type
, type_data
, err_stop
);
3420 tc_cls_offload_cnt_reset(block
, tp
, in_hw_count
, flags
);
3421 if (tp
->ops
->hw_del
)
3422 tp
->ops
->hw_del(tp
, type_data
);
3424 up_read(&block
->cb_lock
);
3427 return ok_count
< 0 ? ok_count
: 0;
3429 EXPORT_SYMBOL(tc_setup_cb_destroy
);
3431 int tc_setup_cb_reoffload(struct tcf_block
*block
, struct tcf_proto
*tp
,
3432 bool add
, flow_setup_cb_t
*cb
,
3433 enum tc_setup_type type
, void *type_data
,
3434 void *cb_priv
, u32
*flags
, unsigned int *in_hw_count
)
3436 int err
= cb(type
, type_data
, cb_priv
);
3439 if (add
&& tc_skip_sw(*flags
))
3442 tc_cls_offload_cnt_update(block
, tp
, in_hw_count
, flags
, 1,
3448 EXPORT_SYMBOL(tc_setup_cb_reoffload
);
3450 static int tcf_act_get_cookie(struct flow_action_entry
*entry
,
3451 const struct tc_action
*act
)
3453 struct tc_cookie
*cookie
;
3457 cookie
= rcu_dereference(act
->act_cookie
);
3459 entry
->cookie
= flow_action_cookie_create(cookie
->data
,
3469 static void tcf_act_put_cookie(struct flow_action_entry
*entry
)
3471 flow_action_cookie_destroy(entry
->cookie
);
3474 void tc_cleanup_flow_action(struct flow_action
*flow_action
)
3476 struct flow_action_entry
*entry
;
3479 flow_action_for_each(i
, entry
, flow_action
) {
3480 tcf_act_put_cookie(entry
);
3481 if (entry
->destructor
)
3482 entry
->destructor(entry
->destructor_priv
);
3485 EXPORT_SYMBOL(tc_cleanup_flow_action
);
3487 static void tcf_mirred_get_dev(struct flow_action_entry
*entry
,
3488 const struct tc_action
*act
)
3490 #ifdef CONFIG_NET_CLS_ACT
3491 entry
->dev
= act
->ops
->get_dev(act
, &entry
->destructor
);
3494 entry
->destructor_priv
= entry
->dev
;
3498 static void tcf_tunnel_encap_put_tunnel(void *priv
)
3500 struct ip_tunnel_info
*tunnel
= priv
;
3505 static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry
*entry
,
3506 const struct tc_action
*act
)
3508 entry
->tunnel
= tcf_tunnel_info_copy(act
);
3511 entry
->destructor
= tcf_tunnel_encap_put_tunnel
;
3512 entry
->destructor_priv
= entry
->tunnel
;
3516 static void tcf_sample_get_group(struct flow_action_entry
*entry
,
3517 const struct tc_action
*act
)
3519 #ifdef CONFIG_NET_CLS_ACT
3520 entry
->sample
.psample_group
=
3521 act
->ops
->get_psample_group(act
, &entry
->destructor
);
3522 entry
->destructor_priv
= entry
->sample
.psample_group
;
3526 static enum flow_action_hw_stats
tc_act_hw_stats(u8 hw_stats
)
3528 if (WARN_ON_ONCE(hw_stats
> TCA_ACT_HW_STATS_ANY
))
3529 return FLOW_ACTION_HW_STATS_DONT_CARE
;
3531 return FLOW_ACTION_HW_STATS_DISABLED
;
3536 int tc_setup_flow_action(struct flow_action
*flow_action
,
3537 const struct tcf_exts
*exts
)
3539 struct tc_action
*act
;
3540 int i
, j
, k
, err
= 0;
3542 BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY
!= FLOW_ACTION_HW_STATS_ANY
);
3543 BUILD_BUG_ON(TCA_ACT_HW_STATS_IMMEDIATE
!= FLOW_ACTION_HW_STATS_IMMEDIATE
);
3544 BUILD_BUG_ON(TCA_ACT_HW_STATS_DELAYED
!= FLOW_ACTION_HW_STATS_DELAYED
);
3550 tcf_exts_for_each_action(i
, act
, exts
) {
3551 struct flow_action_entry
*entry
;
3553 entry
= &flow_action
->entries
[j
];
3554 spin_lock_bh(&act
->tcfa_lock
);
3555 err
= tcf_act_get_cookie(entry
, act
);
3557 goto err_out_locked
;
3559 entry
->hw_stats
= tc_act_hw_stats(act
->hw_stats
);
3561 if (is_tcf_gact_ok(act
)) {
3562 entry
->id
= FLOW_ACTION_ACCEPT
;
3563 } else if (is_tcf_gact_shot(act
)) {
3564 entry
->id
= FLOW_ACTION_DROP
;
3565 } else if (is_tcf_gact_trap(act
)) {
3566 entry
->id
= FLOW_ACTION_TRAP
;
3567 } else if (is_tcf_gact_goto_chain(act
)) {
3568 entry
->id
= FLOW_ACTION_GOTO
;
3569 entry
->chain_index
= tcf_gact_goto_chain_index(act
);
3570 } else if (is_tcf_mirred_egress_redirect(act
)) {
3571 entry
->id
= FLOW_ACTION_REDIRECT
;
3572 tcf_mirred_get_dev(entry
, act
);
3573 } else if (is_tcf_mirred_egress_mirror(act
)) {
3574 entry
->id
= FLOW_ACTION_MIRRED
;
3575 tcf_mirred_get_dev(entry
, act
);
3576 } else if (is_tcf_mirred_ingress_redirect(act
)) {
3577 entry
->id
= FLOW_ACTION_REDIRECT_INGRESS
;
3578 tcf_mirred_get_dev(entry
, act
);
3579 } else if (is_tcf_mirred_ingress_mirror(act
)) {
3580 entry
->id
= FLOW_ACTION_MIRRED_INGRESS
;
3581 tcf_mirred_get_dev(entry
, act
);
3582 } else if (is_tcf_vlan(act
)) {
3583 switch (tcf_vlan_action(act
)) {
3584 case TCA_VLAN_ACT_PUSH
:
3585 entry
->id
= FLOW_ACTION_VLAN_PUSH
;
3586 entry
->vlan
.vid
= tcf_vlan_push_vid(act
);
3587 entry
->vlan
.proto
= tcf_vlan_push_proto(act
);
3588 entry
->vlan
.prio
= tcf_vlan_push_prio(act
);
3590 case TCA_VLAN_ACT_POP
:
3591 entry
->id
= FLOW_ACTION_VLAN_POP
;
3593 case TCA_VLAN_ACT_MODIFY
:
3594 entry
->id
= FLOW_ACTION_VLAN_MANGLE
;
3595 entry
->vlan
.vid
= tcf_vlan_push_vid(act
);
3596 entry
->vlan
.proto
= tcf_vlan_push_proto(act
);
3597 entry
->vlan
.prio
= tcf_vlan_push_prio(act
);
3601 goto err_out_locked
;
3603 } else if (is_tcf_tunnel_set(act
)) {
3604 entry
->id
= FLOW_ACTION_TUNNEL_ENCAP
;
3605 err
= tcf_tunnel_encap_get_tunnel(entry
, act
);
3607 goto err_out_locked
;
3608 } else if (is_tcf_tunnel_release(act
)) {
3609 entry
->id
= FLOW_ACTION_TUNNEL_DECAP
;
3610 } else if (is_tcf_pedit(act
)) {
3611 for (k
= 0; k
< tcf_pedit_nkeys(act
); k
++) {
3612 switch (tcf_pedit_cmd(act
, k
)) {
3613 case TCA_PEDIT_KEY_EX_CMD_SET
:
3614 entry
->id
= FLOW_ACTION_MANGLE
;
3616 case TCA_PEDIT_KEY_EX_CMD_ADD
:
3617 entry
->id
= FLOW_ACTION_ADD
;
3621 goto err_out_locked
;
3623 entry
->mangle
.htype
= tcf_pedit_htype(act
, k
);
3624 entry
->mangle
.mask
= tcf_pedit_mask(act
, k
);
3625 entry
->mangle
.val
= tcf_pedit_val(act
, k
);
3626 entry
->mangle
.offset
= tcf_pedit_offset(act
, k
);
3627 entry
->hw_stats
= tc_act_hw_stats(act
->hw_stats
);
3628 entry
= &flow_action
->entries
[++j
];
3630 } else if (is_tcf_csum(act
)) {
3631 entry
->id
= FLOW_ACTION_CSUM
;
3632 entry
->csum_flags
= tcf_csum_update_flags(act
);
3633 } else if (is_tcf_skbedit_mark(act
)) {
3634 entry
->id
= FLOW_ACTION_MARK
;
3635 entry
->mark
= tcf_skbedit_mark(act
);
3636 } else if (is_tcf_sample(act
)) {
3637 entry
->id
= FLOW_ACTION_SAMPLE
;
3638 entry
->sample
.trunc_size
= tcf_sample_trunc_size(act
);
3639 entry
->sample
.truncate
= tcf_sample_truncate(act
);
3640 entry
->sample
.rate
= tcf_sample_rate(act
);
3641 tcf_sample_get_group(entry
, act
);
3642 } else if (is_tcf_police(act
)) {
3643 entry
->id
= FLOW_ACTION_POLICE
;
3644 entry
->police
.burst
= tcf_police_tcfp_burst(act
);
3645 entry
->police
.rate_bytes_ps
=
3646 tcf_police_rate_bytes_ps(act
);
3647 } else if (is_tcf_ct(act
)) {
3648 entry
->id
= FLOW_ACTION_CT
;
3649 entry
->ct
.action
= tcf_ct_action(act
);
3650 entry
->ct
.zone
= tcf_ct_zone(act
);
3651 entry
->ct
.flow_table
= tcf_ct_ft(act
);
3652 } else if (is_tcf_mpls(act
)) {
3653 switch (tcf_mpls_action(act
)) {
3654 case TCA_MPLS_ACT_PUSH
:
3655 entry
->id
= FLOW_ACTION_MPLS_PUSH
;
3656 entry
->mpls_push
.proto
= tcf_mpls_proto(act
);
3657 entry
->mpls_push
.label
= tcf_mpls_label(act
);
3658 entry
->mpls_push
.tc
= tcf_mpls_tc(act
);
3659 entry
->mpls_push
.bos
= tcf_mpls_bos(act
);
3660 entry
->mpls_push
.ttl
= tcf_mpls_ttl(act
);
3662 case TCA_MPLS_ACT_POP
:
3663 entry
->id
= FLOW_ACTION_MPLS_POP
;
3664 entry
->mpls_pop
.proto
= tcf_mpls_proto(act
);
3666 case TCA_MPLS_ACT_MODIFY
:
3667 entry
->id
= FLOW_ACTION_MPLS_MANGLE
;
3668 entry
->mpls_mangle
.label
= tcf_mpls_label(act
);
3669 entry
->mpls_mangle
.tc
= tcf_mpls_tc(act
);
3670 entry
->mpls_mangle
.bos
= tcf_mpls_bos(act
);
3671 entry
->mpls_mangle
.ttl
= tcf_mpls_ttl(act
);
3674 goto err_out_locked
;
3676 } else if (is_tcf_skbedit_ptype(act
)) {
3677 entry
->id
= FLOW_ACTION_PTYPE
;
3678 entry
->ptype
= tcf_skbedit_ptype(act
);
3679 } else if (is_tcf_skbedit_priority(act
)) {
3680 entry
->id
= FLOW_ACTION_PRIORITY
;
3681 entry
->priority
= tcf_skbedit_priority(act
);
3684 goto err_out_locked
;
3686 spin_unlock_bh(&act
->tcfa_lock
);
3688 if (!is_tcf_pedit(act
))
3694 tc_cleanup_flow_action(flow_action
);
3698 spin_unlock_bh(&act
->tcfa_lock
);
3701 EXPORT_SYMBOL(tc_setup_flow_action
);
3703 unsigned int tcf_exts_num_actions(struct tcf_exts
*exts
)
3705 unsigned int num_acts
= 0;
3706 struct tc_action
*act
;
3709 tcf_exts_for_each_action(i
, act
, exts
) {
3710 if (is_tcf_pedit(act
))
3711 num_acts
+= tcf_pedit_nkeys(act
);
3717 EXPORT_SYMBOL(tcf_exts_num_actions
);
3719 static __net_init
int tcf_net_init(struct net
*net
)
3721 struct tcf_net
*tn
= net_generic(net
, tcf_net_id
);
3723 spin_lock_init(&tn
->idr_lock
);
3728 static void __net_exit
tcf_net_exit(struct net
*net
)
3730 struct tcf_net
*tn
= net_generic(net
, tcf_net_id
);
3732 idr_destroy(&tn
->idr
);
3735 static struct pernet_operations tcf_net_ops
= {
3736 .init
= tcf_net_init
,
3737 .exit
= tcf_net_exit
,
3739 .size
= sizeof(struct tcf_net
),
3742 static struct flow_indr_block_entry block_entry
= {
3743 .cb
= tc_indr_block_get_and_cmd
,
3744 .list
= LIST_HEAD_INIT(block_entry
.list
),
3747 static int __init
tc_filter_init(void)
3751 tc_filter_wq
= alloc_ordered_workqueue("tc_filter_workqueue", 0);
3755 err
= register_pernet_subsys(&tcf_net_ops
);
3757 goto err_register_pernet_subsys
;
3759 flow_indr_add_block_cb(&block_entry
);
3761 rtnl_register(PF_UNSPEC
, RTM_NEWTFILTER
, tc_new_tfilter
, NULL
,
3762 RTNL_FLAG_DOIT_UNLOCKED
);
3763 rtnl_register(PF_UNSPEC
, RTM_DELTFILTER
, tc_del_tfilter
, NULL
,
3764 RTNL_FLAG_DOIT_UNLOCKED
);
3765 rtnl_register(PF_UNSPEC
, RTM_GETTFILTER
, tc_get_tfilter
,
3766 tc_dump_tfilter
, RTNL_FLAG_DOIT_UNLOCKED
);
3767 rtnl_register(PF_UNSPEC
, RTM_NEWCHAIN
, tc_ctl_chain
, NULL
, 0);
3768 rtnl_register(PF_UNSPEC
, RTM_DELCHAIN
, tc_ctl_chain
, NULL
, 0);
3769 rtnl_register(PF_UNSPEC
, RTM_GETCHAIN
, tc_ctl_chain
,
3774 err_register_pernet_subsys
:
3775 destroy_workqueue(tc_filter_wq
);
3779 subsys_initcall(tc_filter_init
);