]> git.ipfire.org Git - thirdparty/kernel/stable.git/blob - net/sched/cls_api.c
net: sched: fix typo in walker_check_empty()
[thirdparty/kernel/stable.git] / net / sched / cls_api.c
1 /*
2 * net/sched/cls_api.c Packet classifier API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Changes:
12 *
13 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
14 *
15 */
16
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/string.h>
21 #include <linux/errno.h>
22 #include <linux/err.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/kmod.h>
26 #include <linux/slab.h>
27 #include <linux/idr.h>
28 #include <linux/rhashtable.h>
29 #include <net/net_namespace.h>
30 #include <net/sock.h>
31 #include <net/netlink.h>
32 #include <net/pkt_sched.h>
33 #include <net/pkt_cls.h>
34 #include <net/tc_act/tc_pedit.h>
35 #include <net/tc_act/tc_mirred.h>
36 #include <net/tc_act/tc_vlan.h>
37 #include <net/tc_act/tc_tunnel_key.h>
38 #include <net/tc_act/tc_csum.h>
39 #include <net/tc_act/tc_gact.h>
40 #include <net/tc_act/tc_skbedit.h>
41
42 extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
43
44 /* The list of all installed classifier types */
45 static LIST_HEAD(tcf_proto_base);
46
47 /* Protects list of registered TC modules. It is pure SMP lock. */
48 static DEFINE_RWLOCK(cls_mod_lock);
49
50 /* Find classifier type by string name */
51
52 static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
53 {
54 const struct tcf_proto_ops *t, *res = NULL;
55
56 if (kind) {
57 read_lock(&cls_mod_lock);
58 list_for_each_entry(t, &tcf_proto_base, head) {
59 if (strcmp(kind, t->kind) == 0) {
60 if (try_module_get(t->owner))
61 res = t;
62 break;
63 }
64 }
65 read_unlock(&cls_mod_lock);
66 }
67 return res;
68 }
69
70 static const struct tcf_proto_ops *
71 tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
72 struct netlink_ext_ack *extack)
73 {
74 const struct tcf_proto_ops *ops;
75
76 ops = __tcf_proto_lookup_ops(kind);
77 if (ops)
78 return ops;
79 #ifdef CONFIG_MODULES
80 if (rtnl_held)
81 rtnl_unlock();
82 request_module("cls_%s", kind);
83 if (rtnl_held)
84 rtnl_lock();
85 ops = __tcf_proto_lookup_ops(kind);
86 /* We dropped the RTNL semaphore in order to perform
87 * the module load. So, even if we succeeded in loading
88 * the module we have to replay the request. We indicate
89 * this using -EAGAIN.
90 */
91 if (ops) {
92 module_put(ops->owner);
93 return ERR_PTR(-EAGAIN);
94 }
95 #endif
96 NL_SET_ERR_MSG(extack, "TC classifier not found");
97 return ERR_PTR(-ENOENT);
98 }
99
100 /* Register(unregister) new classifier type */
101
102 int register_tcf_proto_ops(struct tcf_proto_ops *ops)
103 {
104 struct tcf_proto_ops *t;
105 int rc = -EEXIST;
106
107 write_lock(&cls_mod_lock);
108 list_for_each_entry(t, &tcf_proto_base, head)
109 if (!strcmp(ops->kind, t->kind))
110 goto out;
111
112 list_add_tail(&ops->head, &tcf_proto_base);
113 rc = 0;
114 out:
115 write_unlock(&cls_mod_lock);
116 return rc;
117 }
118 EXPORT_SYMBOL(register_tcf_proto_ops);
119
120 static struct workqueue_struct *tc_filter_wq;
121
122 int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
123 {
124 struct tcf_proto_ops *t;
125 int rc = -ENOENT;
126
127 /* Wait for outstanding call_rcu()s, if any, from a
128 * tcf_proto_ops's destroy() handler.
129 */
130 rcu_barrier();
131 flush_workqueue(tc_filter_wq);
132
133 write_lock(&cls_mod_lock);
134 list_for_each_entry(t, &tcf_proto_base, head) {
135 if (t == ops) {
136 list_del(&t->head);
137 rc = 0;
138 break;
139 }
140 }
141 write_unlock(&cls_mod_lock);
142 return rc;
143 }
144 EXPORT_SYMBOL(unregister_tcf_proto_ops);
145
146 bool tcf_queue_work(struct rcu_work *rwork, work_func_t func)
147 {
148 INIT_RCU_WORK(rwork, func);
149 return queue_rcu_work(tc_filter_wq, rwork);
150 }
151 EXPORT_SYMBOL(tcf_queue_work);
152
153 /* Select new prio value from the range, managed by kernel. */
154
155 static inline u32 tcf_auto_prio(struct tcf_proto *tp)
156 {
157 u32 first = TC_H_MAKE(0xC0000000U, 0U);
158
159 if (tp)
160 first = tp->prio - 1;
161
162 return TC_H_MAJ(first);
163 }
164
165 static bool tcf_proto_is_unlocked(const char *kind)
166 {
167 const struct tcf_proto_ops *ops;
168 bool ret;
169
170 ops = tcf_proto_lookup_ops(kind, false, NULL);
171 /* On error return false to take rtnl lock. Proto lookup/create
172 * functions will perform lookup again and properly handle errors.
173 */
174 if (IS_ERR(ops))
175 return false;
176
177 ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED);
178 module_put(ops->owner);
179 return ret;
180 }
181
182 static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
183 u32 prio, struct tcf_chain *chain,
184 bool rtnl_held,
185 struct netlink_ext_ack *extack)
186 {
187 struct tcf_proto *tp;
188 int err;
189
190 tp = kzalloc(sizeof(*tp), GFP_KERNEL);
191 if (!tp)
192 return ERR_PTR(-ENOBUFS);
193
194 tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
195 if (IS_ERR(tp->ops)) {
196 err = PTR_ERR(tp->ops);
197 goto errout;
198 }
199 tp->classify = tp->ops->classify;
200 tp->protocol = protocol;
201 tp->prio = prio;
202 tp->chain = chain;
203 spin_lock_init(&tp->lock);
204 refcount_set(&tp->refcnt, 1);
205
206 err = tp->ops->init(tp);
207 if (err) {
208 module_put(tp->ops->owner);
209 goto errout;
210 }
211 return tp;
212
213 errout:
214 kfree(tp);
215 return ERR_PTR(err);
216 }
217
218 static void tcf_proto_get(struct tcf_proto *tp)
219 {
220 refcount_inc(&tp->refcnt);
221 }
222
223 static void tcf_chain_put(struct tcf_chain *chain);
224
225 static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
226 struct netlink_ext_ack *extack)
227 {
228 tp->ops->destroy(tp, rtnl_held, extack);
229 tcf_chain_put(tp->chain);
230 module_put(tp->ops->owner);
231 kfree_rcu(tp, rcu);
232 }
233
234 static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
235 struct netlink_ext_ack *extack)
236 {
237 if (refcount_dec_and_test(&tp->refcnt))
238 tcf_proto_destroy(tp, rtnl_held, extack);
239 }
240
241 static int walker_check_empty(struct tcf_proto *tp, void *fh,
242 struct tcf_walker *arg)
243 {
244 if (fh) {
245 arg->nonempty = true;
246 return -1;
247 }
248 return 0;
249 }
250
251 static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held)
252 {
253 struct tcf_walker walker = { .fn = walker_check_empty, };
254
255 if (tp->ops->walk) {
256 tp->ops->walk(tp, &walker, rtnl_held);
257 return !walker.nonempty;
258 }
259 return true;
260 }
261
262 static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held)
263 {
264 spin_lock(&tp->lock);
265 if (tcf_proto_is_empty(tp, rtnl_held))
266 tp->deleting = true;
267 spin_unlock(&tp->lock);
268 return tp->deleting;
269 }
270
271 static void tcf_proto_mark_delete(struct tcf_proto *tp)
272 {
273 spin_lock(&tp->lock);
274 tp->deleting = true;
275 spin_unlock(&tp->lock);
276 }
277
278 static bool tcf_proto_is_deleting(struct tcf_proto *tp)
279 {
280 bool deleting;
281
282 spin_lock(&tp->lock);
283 deleting = tp->deleting;
284 spin_unlock(&tp->lock);
285
286 return deleting;
287 }
288
289 #define ASSERT_BLOCK_LOCKED(block) \
290 lockdep_assert_held(&(block)->lock)
291
292 struct tcf_filter_chain_list_item {
293 struct list_head list;
294 tcf_chain_head_change_t *chain_head_change;
295 void *chain_head_change_priv;
296 };
297
298 static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
299 u32 chain_index)
300 {
301 struct tcf_chain *chain;
302
303 ASSERT_BLOCK_LOCKED(block);
304
305 chain = kzalloc(sizeof(*chain), GFP_KERNEL);
306 if (!chain)
307 return NULL;
308 list_add_tail(&chain->list, &block->chain_list);
309 mutex_init(&chain->filter_chain_lock);
310 chain->block = block;
311 chain->index = chain_index;
312 chain->refcnt = 1;
313 if (!chain->index)
314 block->chain0.chain = chain;
315 return chain;
316 }
317
318 static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item,
319 struct tcf_proto *tp_head)
320 {
321 if (item->chain_head_change)
322 item->chain_head_change(tp_head, item->chain_head_change_priv);
323 }
324
325 static void tcf_chain0_head_change(struct tcf_chain *chain,
326 struct tcf_proto *tp_head)
327 {
328 struct tcf_filter_chain_list_item *item;
329 struct tcf_block *block = chain->block;
330
331 if (chain->index)
332 return;
333
334 mutex_lock(&block->lock);
335 list_for_each_entry(item, &block->chain0.filter_chain_list, list)
336 tcf_chain_head_change_item(item, tp_head);
337 mutex_unlock(&block->lock);
338 }
339
340 /* Returns true if block can be safely freed. */
341
342 static bool tcf_chain_detach(struct tcf_chain *chain)
343 {
344 struct tcf_block *block = chain->block;
345
346 ASSERT_BLOCK_LOCKED(block);
347
348 list_del(&chain->list);
349 if (!chain->index)
350 block->chain0.chain = NULL;
351
352 if (list_empty(&block->chain_list) &&
353 refcount_read(&block->refcnt) == 0)
354 return true;
355
356 return false;
357 }
358
359 static void tcf_block_destroy(struct tcf_block *block)
360 {
361 mutex_destroy(&block->lock);
362 kfree_rcu(block, rcu);
363 }
364
365 static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
366 {
367 struct tcf_block *block = chain->block;
368
369 mutex_destroy(&chain->filter_chain_lock);
370 kfree(chain);
371 if (free_block)
372 tcf_block_destroy(block);
373 }
374
375 static void tcf_chain_hold(struct tcf_chain *chain)
376 {
377 ASSERT_BLOCK_LOCKED(chain->block);
378
379 ++chain->refcnt;
380 }
381
382 static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
383 {
384 ASSERT_BLOCK_LOCKED(chain->block);
385
386 /* In case all the references are action references, this
387 * chain should not be shown to the user.
388 */
389 return chain->refcnt == chain->action_refcnt;
390 }
391
392 static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
393 u32 chain_index)
394 {
395 struct tcf_chain *chain;
396
397 ASSERT_BLOCK_LOCKED(block);
398
399 list_for_each_entry(chain, &block->chain_list, list) {
400 if (chain->index == chain_index)
401 return chain;
402 }
403 return NULL;
404 }
405
406 static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
407 u32 seq, u16 flags, int event, bool unicast);
408
409 static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
410 u32 chain_index, bool create,
411 bool by_act)
412 {
413 struct tcf_chain *chain = NULL;
414 bool is_first_reference;
415
416 mutex_lock(&block->lock);
417 chain = tcf_chain_lookup(block, chain_index);
418 if (chain) {
419 tcf_chain_hold(chain);
420 } else {
421 if (!create)
422 goto errout;
423 chain = tcf_chain_create(block, chain_index);
424 if (!chain)
425 goto errout;
426 }
427
428 if (by_act)
429 ++chain->action_refcnt;
430 is_first_reference = chain->refcnt - chain->action_refcnt == 1;
431 mutex_unlock(&block->lock);
432
433 /* Send notification only in case we got the first
434 * non-action reference. Until then, the chain acts only as
435 * a placeholder for actions pointing to it and user ought
436 * not know about them.
437 */
438 if (is_first_reference && !by_act)
439 tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
440 RTM_NEWCHAIN, false);
441
442 return chain;
443
444 errout:
445 mutex_unlock(&block->lock);
446 return chain;
447 }
448
449 static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
450 bool create)
451 {
452 return __tcf_chain_get(block, chain_index, create, false);
453 }
454
455 struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
456 {
457 return __tcf_chain_get(block, chain_index, true, true);
458 }
459 EXPORT_SYMBOL(tcf_chain_get_by_act);
460
461 static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
462 void *tmplt_priv);
463 static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
464 void *tmplt_priv, u32 chain_index,
465 struct tcf_block *block, struct sk_buff *oskb,
466 u32 seq, u16 flags, bool unicast);
467
468 static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
469 bool explicitly_created)
470 {
471 struct tcf_block *block = chain->block;
472 const struct tcf_proto_ops *tmplt_ops;
473 bool is_last, free_block = false;
474 unsigned int refcnt;
475 void *tmplt_priv;
476 u32 chain_index;
477
478 mutex_lock(&block->lock);
479 if (explicitly_created) {
480 if (!chain->explicitly_created) {
481 mutex_unlock(&block->lock);
482 return;
483 }
484 chain->explicitly_created = false;
485 }
486
487 if (by_act)
488 chain->action_refcnt--;
489
490 /* tc_chain_notify_delete can't be called while holding block lock.
491 * However, when block is unlocked chain can be changed concurrently, so
492 * save these to temporary variables.
493 */
494 refcnt = --chain->refcnt;
495 is_last = refcnt - chain->action_refcnt == 0;
496 tmplt_ops = chain->tmplt_ops;
497 tmplt_priv = chain->tmplt_priv;
498 chain_index = chain->index;
499
500 if (refcnt == 0)
501 free_block = tcf_chain_detach(chain);
502 mutex_unlock(&block->lock);
503
504 /* The last dropped non-action reference will trigger notification. */
505 if (is_last && !by_act) {
506 tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain_index,
507 block, NULL, 0, 0, false);
508 /* Last reference to chain, no need to lock. */
509 chain->flushing = false;
510 }
511
512 if (refcnt == 0) {
513 tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
514 tcf_chain_destroy(chain, free_block);
515 }
516 }
517
518 static void tcf_chain_put(struct tcf_chain *chain)
519 {
520 __tcf_chain_put(chain, false, false);
521 }
522
523 void tcf_chain_put_by_act(struct tcf_chain *chain)
524 {
525 __tcf_chain_put(chain, true, false);
526 }
527 EXPORT_SYMBOL(tcf_chain_put_by_act);
528
529 static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
530 {
531 __tcf_chain_put(chain, false, true);
532 }
533
534 static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
535 {
536 struct tcf_proto *tp, *tp_next;
537
538 mutex_lock(&chain->filter_chain_lock);
539 tp = tcf_chain_dereference(chain->filter_chain, chain);
540 RCU_INIT_POINTER(chain->filter_chain, NULL);
541 tcf_chain0_head_change(chain, NULL);
542 chain->flushing = true;
543 mutex_unlock(&chain->filter_chain_lock);
544
545 while (tp) {
546 tp_next = rcu_dereference_protected(tp->next, 1);
547 tcf_proto_put(tp, rtnl_held, NULL);
548 tp = tp_next;
549 }
550 }
551
552 static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
553 {
554 const struct Qdisc_class_ops *cops;
555 struct Qdisc *qdisc;
556
557 if (!dev_ingress_queue(dev))
558 return NULL;
559
560 qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
561 if (!qdisc)
562 return NULL;
563
564 cops = qdisc->ops->cl_ops;
565 if (!cops)
566 return NULL;
567
568 if (!cops->tcf_block)
569 return NULL;
570
571 return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
572 }
573
574 static struct rhashtable indr_setup_block_ht;
575
576 struct tc_indr_block_dev {
577 struct rhash_head ht_node;
578 struct net_device *dev;
579 unsigned int refcnt;
580 struct list_head cb_list;
581 struct tcf_block *block;
582 };
583
584 struct tc_indr_block_cb {
585 struct list_head list;
586 void *cb_priv;
587 tc_indr_block_bind_cb_t *cb;
588 void *cb_ident;
589 };
590
591 static const struct rhashtable_params tc_indr_setup_block_ht_params = {
592 .key_offset = offsetof(struct tc_indr_block_dev, dev),
593 .head_offset = offsetof(struct tc_indr_block_dev, ht_node),
594 .key_len = sizeof(struct net_device *),
595 };
596
597 static struct tc_indr_block_dev *
598 tc_indr_block_dev_lookup(struct net_device *dev)
599 {
600 return rhashtable_lookup_fast(&indr_setup_block_ht, &dev,
601 tc_indr_setup_block_ht_params);
602 }
603
604 static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev)
605 {
606 struct tc_indr_block_dev *indr_dev;
607
608 indr_dev = tc_indr_block_dev_lookup(dev);
609 if (indr_dev)
610 goto inc_ref;
611
612 indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
613 if (!indr_dev)
614 return NULL;
615
616 INIT_LIST_HEAD(&indr_dev->cb_list);
617 indr_dev->dev = dev;
618 indr_dev->block = tc_dev_ingress_block(dev);
619 if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
620 tc_indr_setup_block_ht_params)) {
621 kfree(indr_dev);
622 return NULL;
623 }
624
625 inc_ref:
626 indr_dev->refcnt++;
627 return indr_dev;
628 }
629
630 static void tc_indr_block_dev_put(struct tc_indr_block_dev *indr_dev)
631 {
632 if (--indr_dev->refcnt)
633 return;
634
635 rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node,
636 tc_indr_setup_block_ht_params);
637 kfree(indr_dev);
638 }
639
640 static struct tc_indr_block_cb *
641 tc_indr_block_cb_lookup(struct tc_indr_block_dev *indr_dev,
642 tc_indr_block_bind_cb_t *cb, void *cb_ident)
643 {
644 struct tc_indr_block_cb *indr_block_cb;
645
646 list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
647 if (indr_block_cb->cb == cb &&
648 indr_block_cb->cb_ident == cb_ident)
649 return indr_block_cb;
650 return NULL;
651 }
652
653 static struct tc_indr_block_cb *
654 tc_indr_block_cb_add(struct tc_indr_block_dev *indr_dev, void *cb_priv,
655 tc_indr_block_bind_cb_t *cb, void *cb_ident)
656 {
657 struct tc_indr_block_cb *indr_block_cb;
658
659 indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
660 if (indr_block_cb)
661 return ERR_PTR(-EEXIST);
662
663 indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL);
664 if (!indr_block_cb)
665 return ERR_PTR(-ENOMEM);
666
667 indr_block_cb->cb_priv = cb_priv;
668 indr_block_cb->cb = cb;
669 indr_block_cb->cb_ident = cb_ident;
670 list_add(&indr_block_cb->list, &indr_dev->cb_list);
671
672 return indr_block_cb;
673 }
674
675 static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb)
676 {
677 list_del(&indr_block_cb->list);
678 kfree(indr_block_cb);
679 }
680
681 static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev,
682 struct tc_indr_block_cb *indr_block_cb,
683 enum tc_block_command command)
684 {
685 struct tc_block_offload bo = {
686 .command = command,
687 .binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
688 .block = indr_dev->block,
689 };
690
691 if (!indr_dev->block)
692 return;
693
694 indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
695 &bo);
696 }
697
698 int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
699 tc_indr_block_bind_cb_t *cb, void *cb_ident)
700 {
701 struct tc_indr_block_cb *indr_block_cb;
702 struct tc_indr_block_dev *indr_dev;
703 int err;
704
705 indr_dev = tc_indr_block_dev_get(dev);
706 if (!indr_dev)
707 return -ENOMEM;
708
709 indr_block_cb = tc_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident);
710 err = PTR_ERR_OR_ZERO(indr_block_cb);
711 if (err)
712 goto err_dev_put;
713
714 tc_indr_block_ing_cmd(indr_dev, indr_block_cb, TC_BLOCK_BIND);
715 return 0;
716
717 err_dev_put:
718 tc_indr_block_dev_put(indr_dev);
719 return err;
720 }
721 EXPORT_SYMBOL_GPL(__tc_indr_block_cb_register);
722
723 int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
724 tc_indr_block_bind_cb_t *cb, void *cb_ident)
725 {
726 int err;
727
728 rtnl_lock();
729 err = __tc_indr_block_cb_register(dev, cb_priv, cb, cb_ident);
730 rtnl_unlock();
731
732 return err;
733 }
734 EXPORT_SYMBOL_GPL(tc_indr_block_cb_register);
735
736 void __tc_indr_block_cb_unregister(struct net_device *dev,
737 tc_indr_block_bind_cb_t *cb, void *cb_ident)
738 {
739 struct tc_indr_block_cb *indr_block_cb;
740 struct tc_indr_block_dev *indr_dev;
741
742 indr_dev = tc_indr_block_dev_lookup(dev);
743 if (!indr_dev)
744 return;
745
746 indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
747 if (!indr_block_cb)
748 return;
749
750 /* Send unbind message if required to free any block cbs. */
751 tc_indr_block_ing_cmd(indr_dev, indr_block_cb, TC_BLOCK_UNBIND);
752 tc_indr_block_cb_del(indr_block_cb);
753 tc_indr_block_dev_put(indr_dev);
754 }
755 EXPORT_SYMBOL_GPL(__tc_indr_block_cb_unregister);
756
757 void tc_indr_block_cb_unregister(struct net_device *dev,
758 tc_indr_block_bind_cb_t *cb, void *cb_ident)
759 {
760 rtnl_lock();
761 __tc_indr_block_cb_unregister(dev, cb, cb_ident);
762 rtnl_unlock();
763 }
764 EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister);
765
766 static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
767 struct tcf_block_ext_info *ei,
768 enum tc_block_command command,
769 struct netlink_ext_ack *extack)
770 {
771 struct tc_indr_block_cb *indr_block_cb;
772 struct tc_indr_block_dev *indr_dev;
773 struct tc_block_offload bo = {
774 .command = command,
775 .binder_type = ei->binder_type,
776 .block = block,
777 .extack = extack,
778 };
779
780 indr_dev = tc_indr_block_dev_lookup(dev);
781 if (!indr_dev)
782 return;
783
784 indr_dev->block = command == TC_BLOCK_BIND ? block : NULL;
785
786 list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
787 indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
788 &bo);
789 }
790
791 static bool tcf_block_offload_in_use(struct tcf_block *block)
792 {
793 return block->offloadcnt;
794 }
795
796 static int tcf_block_offload_cmd(struct tcf_block *block,
797 struct net_device *dev,
798 struct tcf_block_ext_info *ei,
799 enum tc_block_command command,
800 struct netlink_ext_ack *extack)
801 {
802 struct tc_block_offload bo = {};
803
804 bo.command = command;
805 bo.binder_type = ei->binder_type;
806 bo.block = block;
807 bo.extack = extack;
808 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
809 }
810
811 static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
812 struct tcf_block_ext_info *ei,
813 struct netlink_ext_ack *extack)
814 {
815 struct net_device *dev = q->dev_queue->dev;
816 int err;
817
818 if (!dev->netdev_ops->ndo_setup_tc)
819 goto no_offload_dev_inc;
820
821 /* If tc offload feature is disabled and the block we try to bind
822 * to already has some offloaded filters, forbid to bind.
823 */
824 if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) {
825 NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
826 return -EOPNOTSUPP;
827 }
828
829 err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND, extack);
830 if (err == -EOPNOTSUPP)
831 goto no_offload_dev_inc;
832 if (err)
833 return err;
834
835 tc_indr_block_call(block, dev, ei, TC_BLOCK_BIND, extack);
836 return 0;
837
838 no_offload_dev_inc:
839 if (tcf_block_offload_in_use(block))
840 return -EOPNOTSUPP;
841 block->nooffloaddevcnt++;
842 tc_indr_block_call(block, dev, ei, TC_BLOCK_BIND, extack);
843 return 0;
844 }
845
846 static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
847 struct tcf_block_ext_info *ei)
848 {
849 struct net_device *dev = q->dev_queue->dev;
850 int err;
851
852 tc_indr_block_call(block, dev, ei, TC_BLOCK_UNBIND, NULL);
853
854 if (!dev->netdev_ops->ndo_setup_tc)
855 goto no_offload_dev_dec;
856 err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND, NULL);
857 if (err == -EOPNOTSUPP)
858 goto no_offload_dev_dec;
859 return;
860
861 no_offload_dev_dec:
862 WARN_ON(block->nooffloaddevcnt-- == 0);
863 }
864
865 static int
866 tcf_chain0_head_change_cb_add(struct tcf_block *block,
867 struct tcf_block_ext_info *ei,
868 struct netlink_ext_ack *extack)
869 {
870 struct tcf_filter_chain_list_item *item;
871 struct tcf_chain *chain0;
872
873 item = kmalloc(sizeof(*item), GFP_KERNEL);
874 if (!item) {
875 NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed");
876 return -ENOMEM;
877 }
878 item->chain_head_change = ei->chain_head_change;
879 item->chain_head_change_priv = ei->chain_head_change_priv;
880
881 mutex_lock(&block->lock);
882 chain0 = block->chain0.chain;
883 if (chain0)
884 tcf_chain_hold(chain0);
885 else
886 list_add(&item->list, &block->chain0.filter_chain_list);
887 mutex_unlock(&block->lock);
888
889 if (chain0) {
890 struct tcf_proto *tp_head;
891
892 mutex_lock(&chain0->filter_chain_lock);
893
894 tp_head = tcf_chain_dereference(chain0->filter_chain, chain0);
895 if (tp_head)
896 tcf_chain_head_change_item(item, tp_head);
897
898 mutex_lock(&block->lock);
899 list_add(&item->list, &block->chain0.filter_chain_list);
900 mutex_unlock(&block->lock);
901
902 mutex_unlock(&chain0->filter_chain_lock);
903 tcf_chain_put(chain0);
904 }
905
906 return 0;
907 }
908
909 static void
910 tcf_chain0_head_change_cb_del(struct tcf_block *block,
911 struct tcf_block_ext_info *ei)
912 {
913 struct tcf_filter_chain_list_item *item;
914
915 mutex_lock(&block->lock);
916 list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
917 if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
918 (item->chain_head_change == ei->chain_head_change &&
919 item->chain_head_change_priv == ei->chain_head_change_priv)) {
920 if (block->chain0.chain)
921 tcf_chain_head_change_item(item, NULL);
922 list_del(&item->list);
923 mutex_unlock(&block->lock);
924
925 kfree(item);
926 return;
927 }
928 }
929 mutex_unlock(&block->lock);
930 WARN_ON(1);
931 }
932
933 struct tcf_net {
934 spinlock_t idr_lock; /* Protects idr */
935 struct idr idr;
936 };
937
938 static unsigned int tcf_net_id;
939
940 static int tcf_block_insert(struct tcf_block *block, struct net *net,
941 struct netlink_ext_ack *extack)
942 {
943 struct tcf_net *tn = net_generic(net, tcf_net_id);
944 int err;
945
946 idr_preload(GFP_KERNEL);
947 spin_lock(&tn->idr_lock);
948 err = idr_alloc_u32(&tn->idr, block, &block->index, block->index,
949 GFP_NOWAIT);
950 spin_unlock(&tn->idr_lock);
951 idr_preload_end();
952
953 return err;
954 }
955
956 static void tcf_block_remove(struct tcf_block *block, struct net *net)
957 {
958 struct tcf_net *tn = net_generic(net, tcf_net_id);
959
960 spin_lock(&tn->idr_lock);
961 idr_remove(&tn->idr, block->index);
962 spin_unlock(&tn->idr_lock);
963 }
964
965 static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
966 u32 block_index,
967 struct netlink_ext_ack *extack)
968 {
969 struct tcf_block *block;
970
971 block = kzalloc(sizeof(*block), GFP_KERNEL);
972 if (!block) {
973 NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
974 return ERR_PTR(-ENOMEM);
975 }
976 mutex_init(&block->lock);
977 INIT_LIST_HEAD(&block->chain_list);
978 INIT_LIST_HEAD(&block->cb_list);
979 INIT_LIST_HEAD(&block->owner_list);
980 INIT_LIST_HEAD(&block->chain0.filter_chain_list);
981
982 refcount_set(&block->refcnt, 1);
983 block->net = net;
984 block->index = block_index;
985
986 /* Don't store q pointer for blocks which are shared */
987 if (!tcf_block_shared(block))
988 block->q = q;
989 return block;
990 }
991
992 static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
993 {
994 struct tcf_net *tn = net_generic(net, tcf_net_id);
995
996 return idr_find(&tn->idr, block_index);
997 }
998
999 static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
1000 {
1001 struct tcf_block *block;
1002
1003 rcu_read_lock();
1004 block = tcf_block_lookup(net, block_index);
1005 if (block && !refcount_inc_not_zero(&block->refcnt))
1006 block = NULL;
1007 rcu_read_unlock();
1008
1009 return block;
1010 }
1011
1012 static struct tcf_chain *
1013 __tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
1014 {
1015 mutex_lock(&block->lock);
1016 if (chain)
1017 chain = list_is_last(&chain->list, &block->chain_list) ?
1018 NULL : list_next_entry(chain, list);
1019 else
1020 chain = list_first_entry_or_null(&block->chain_list,
1021 struct tcf_chain, list);
1022
1023 /* skip all action-only chains */
1024 while (chain && tcf_chain_held_by_acts_only(chain))
1025 chain = list_is_last(&chain->list, &block->chain_list) ?
1026 NULL : list_next_entry(chain, list);
1027
1028 if (chain)
1029 tcf_chain_hold(chain);
1030 mutex_unlock(&block->lock);
1031
1032 return chain;
1033 }
1034
1035 /* Function to be used by all clients that want to iterate over all chains on
1036 * block. It properly obtains block->lock and takes reference to chain before
1037 * returning it. Users of this function must be tolerant to concurrent chain
1038 * insertion/deletion or ensure that no concurrent chain modification is
1039 * possible. Note that all netlink dump callbacks cannot guarantee to provide
1040 * consistent dump because rtnl lock is released each time skb is filled with
1041 * data and sent to user-space.
1042 */
1043
1044 struct tcf_chain *
1045 tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
1046 {
1047 struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain);
1048
1049 if (chain)
1050 tcf_chain_put(chain);
1051
1052 return chain_next;
1053 }
1054 EXPORT_SYMBOL(tcf_get_next_chain);
1055
1056 static struct tcf_proto *
1057 __tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
1058 {
1059 u32 prio = 0;
1060
1061 ASSERT_RTNL();
1062 mutex_lock(&chain->filter_chain_lock);
1063
1064 if (!tp) {
1065 tp = tcf_chain_dereference(chain->filter_chain, chain);
1066 } else if (tcf_proto_is_deleting(tp)) {
1067 /* 'deleting' flag is set and chain->filter_chain_lock was
1068 * unlocked, which means next pointer could be invalid. Restart
1069 * search.
1070 */
1071 prio = tp->prio + 1;
1072 tp = tcf_chain_dereference(chain->filter_chain, chain);
1073
1074 for (; tp; tp = tcf_chain_dereference(tp->next, chain))
1075 if (!tp->deleting && tp->prio >= prio)
1076 break;
1077 } else {
1078 tp = tcf_chain_dereference(tp->next, chain);
1079 }
1080
1081 if (tp)
1082 tcf_proto_get(tp);
1083
1084 mutex_unlock(&chain->filter_chain_lock);
1085
1086 return tp;
1087 }
1088
1089 /* Function to be used by all clients that want to iterate over all tp's on
1090 * chain. Users of this function must be tolerant to concurrent tp
1091 * insertion/deletion or ensure that no concurrent chain modification is
1092 * possible. Note that all netlink dump callbacks cannot guarantee to provide
1093 * consistent dump because rtnl lock is released each time skb is filled with
1094 * data and sent to user-space.
1095 */
1096
1097 struct tcf_proto *
1098 tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp,
1099 bool rtnl_held)
1100 {
1101 struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp);
1102
1103 if (tp)
1104 tcf_proto_put(tp, rtnl_held, NULL);
1105
1106 return tp_next;
1107 }
1108 EXPORT_SYMBOL(tcf_get_next_proto);
1109
1110 static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held)
1111 {
1112 struct tcf_chain *chain;
1113
1114 /* Last reference to block. At this point chains cannot be added or
1115 * removed concurrently.
1116 */
1117 for (chain = tcf_get_next_chain(block, NULL);
1118 chain;
1119 chain = tcf_get_next_chain(block, chain)) {
1120 tcf_chain_put_explicitly_created(chain);
1121 tcf_chain_flush(chain, rtnl_held);
1122 }
1123 }
1124
1125 /* Lookup Qdisc and increments its reference counter.
1126 * Set parent, if necessary.
1127 */
1128
1129 static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
1130 u32 *parent, int ifindex, bool rtnl_held,
1131 struct netlink_ext_ack *extack)
1132 {
1133 const struct Qdisc_class_ops *cops;
1134 struct net_device *dev;
1135 int err = 0;
1136
1137 if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
1138 return 0;
1139
1140 rcu_read_lock();
1141
1142 /* Find link */
1143 dev = dev_get_by_index_rcu(net, ifindex);
1144 if (!dev) {
1145 rcu_read_unlock();
1146 return -ENODEV;
1147 }
1148
1149 /* Find qdisc */
1150 if (!*parent) {
1151 *q = dev->qdisc;
1152 *parent = (*q)->handle;
1153 } else {
1154 *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
1155 if (!*q) {
1156 NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
1157 err = -EINVAL;
1158 goto errout_rcu;
1159 }
1160 }
1161
1162 *q = qdisc_refcount_inc_nz(*q);
1163 if (!*q) {
1164 NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
1165 err = -EINVAL;
1166 goto errout_rcu;
1167 }
1168
1169 /* Is it classful? */
1170 cops = (*q)->ops->cl_ops;
1171 if (!cops) {
1172 NL_SET_ERR_MSG(extack, "Qdisc not classful");
1173 err = -EINVAL;
1174 goto errout_qdisc;
1175 }
1176
1177 if (!cops->tcf_block) {
1178 NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
1179 err = -EOPNOTSUPP;
1180 goto errout_qdisc;
1181 }
1182
1183 errout_rcu:
1184 /* At this point we know that qdisc is not noop_qdisc,
1185 * which means that qdisc holds a reference to net_device
1186 * and we hold a reference to qdisc, so it is safe to release
1187 * rcu read lock.
1188 */
1189 rcu_read_unlock();
1190 return err;
1191
1192 errout_qdisc:
1193 rcu_read_unlock();
1194
1195 if (rtnl_held)
1196 qdisc_put(*q);
1197 else
1198 qdisc_put_unlocked(*q);
1199 *q = NULL;
1200
1201 return err;
1202 }
1203
1204 static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl,
1205 int ifindex, struct netlink_ext_ack *extack)
1206 {
1207 if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
1208 return 0;
1209
1210 /* Do we search for filter, attached to class? */
1211 if (TC_H_MIN(parent)) {
1212 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1213
1214 *cl = cops->find(q, parent);
1215 if (*cl == 0) {
1216 NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
1217 return -ENOENT;
1218 }
1219 }
1220
1221 return 0;
1222 }
1223
1224 static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q,
1225 unsigned long cl, int ifindex,
1226 u32 block_index,
1227 struct netlink_ext_ack *extack)
1228 {
1229 struct tcf_block *block;
1230
1231 if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
1232 block = tcf_block_refcnt_get(net, block_index);
1233 if (!block) {
1234 NL_SET_ERR_MSG(extack, "Block of given index was not found");
1235 return ERR_PTR(-EINVAL);
1236 }
1237 } else {
1238 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1239
1240 block = cops->tcf_block(q, cl, extack);
1241 if (!block)
1242 return ERR_PTR(-EINVAL);
1243
1244 if (tcf_block_shared(block)) {
1245 NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
1246 return ERR_PTR(-EOPNOTSUPP);
1247 }
1248
1249 /* Always take reference to block in order to support execution
1250 * of rules update path of cls API without rtnl lock. Caller
1251 * must release block when it is finished using it. 'if' block
1252 * of this conditional obtain reference to block by calling
1253 * tcf_block_refcnt_get().
1254 */
1255 refcount_inc(&block->refcnt);
1256 }
1257
1258 return block;
1259 }
1260
1261 static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
1262 struct tcf_block_ext_info *ei, bool rtnl_held)
1263 {
1264 if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
1265 /* Flushing/putting all chains will cause the block to be
1266 * deallocated when last chain is freed. However, if chain_list
1267 * is empty, block has to be manually deallocated. After block
1268 * reference counter reached 0, it is no longer possible to
1269 * increment it or add new chains to block.
1270 */
1271 bool free_block = list_empty(&block->chain_list);
1272
1273 mutex_unlock(&block->lock);
1274 if (tcf_block_shared(block))
1275 tcf_block_remove(block, block->net);
1276
1277 if (q)
1278 tcf_block_offload_unbind(block, q, ei);
1279
1280 if (free_block)
1281 tcf_block_destroy(block);
1282 else
1283 tcf_block_flush_all_chains(block, rtnl_held);
1284 } else if (q) {
1285 tcf_block_offload_unbind(block, q, ei);
1286 }
1287 }
1288
1289 static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held)
1290 {
1291 __tcf_block_put(block, NULL, NULL, rtnl_held);
1292 }
1293
1294 /* Find tcf block.
1295 * Set q, parent, cl when appropriate.
1296 */
1297
1298 static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
1299 u32 *parent, unsigned long *cl,
1300 int ifindex, u32 block_index,
1301 struct netlink_ext_ack *extack)
1302 {
1303 struct tcf_block *block;
1304 int err = 0;
1305
1306 ASSERT_RTNL();
1307
1308 err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack);
1309 if (err)
1310 goto errout;
1311
1312 err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack);
1313 if (err)
1314 goto errout_qdisc;
1315
1316 block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack);
1317 if (IS_ERR(block)) {
1318 err = PTR_ERR(block);
1319 goto errout_qdisc;
1320 }
1321
1322 return block;
1323
1324 errout_qdisc:
1325 if (*q)
1326 qdisc_put(*q);
1327 errout:
1328 *q = NULL;
1329 return ERR_PTR(err);
1330 }
1331
1332 static void tcf_block_release(struct Qdisc *q, struct tcf_block *block,
1333 bool rtnl_held)
1334 {
1335 if (!IS_ERR_OR_NULL(block))
1336 tcf_block_refcnt_put(block, rtnl_held);
1337
1338 if (q) {
1339 if (rtnl_held)
1340 qdisc_put(q);
1341 else
1342 qdisc_put_unlocked(q);
1343 }
1344 }
1345
1346 struct tcf_block_owner_item {
1347 struct list_head list;
1348 struct Qdisc *q;
1349 enum tcf_block_binder_type binder_type;
1350 };
1351
1352 static void
1353 tcf_block_owner_netif_keep_dst(struct tcf_block *block,
1354 struct Qdisc *q,
1355 enum tcf_block_binder_type binder_type)
1356 {
1357 if (block->keep_dst &&
1358 binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
1359 binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
1360 netif_keep_dst(qdisc_dev(q));
1361 }
1362
1363 void tcf_block_netif_keep_dst(struct tcf_block *block)
1364 {
1365 struct tcf_block_owner_item *item;
1366
1367 block->keep_dst = true;
1368 list_for_each_entry(item, &block->owner_list, list)
1369 tcf_block_owner_netif_keep_dst(block, item->q,
1370 item->binder_type);
1371 }
1372 EXPORT_SYMBOL(tcf_block_netif_keep_dst);
1373
1374 static int tcf_block_owner_add(struct tcf_block *block,
1375 struct Qdisc *q,
1376 enum tcf_block_binder_type binder_type)
1377 {
1378 struct tcf_block_owner_item *item;
1379
1380 item = kmalloc(sizeof(*item), GFP_KERNEL);
1381 if (!item)
1382 return -ENOMEM;
1383 item->q = q;
1384 item->binder_type = binder_type;
1385 list_add(&item->list, &block->owner_list);
1386 return 0;
1387 }
1388
1389 static void tcf_block_owner_del(struct tcf_block *block,
1390 struct Qdisc *q,
1391 enum tcf_block_binder_type binder_type)
1392 {
1393 struct tcf_block_owner_item *item;
1394
1395 list_for_each_entry(item, &block->owner_list, list) {
1396 if (item->q == q && item->binder_type == binder_type) {
1397 list_del(&item->list);
1398 kfree(item);
1399 return;
1400 }
1401 }
1402 WARN_ON(1);
1403 }
1404
1405 int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
1406 struct tcf_block_ext_info *ei,
1407 struct netlink_ext_ack *extack)
1408 {
1409 struct net *net = qdisc_net(q);
1410 struct tcf_block *block = NULL;
1411 int err;
1412
1413 if (ei->block_index)
1414 /* block_index not 0 means the shared block is requested */
1415 block = tcf_block_refcnt_get(net, ei->block_index);
1416
1417 if (!block) {
1418 block = tcf_block_create(net, q, ei->block_index, extack);
1419 if (IS_ERR(block))
1420 return PTR_ERR(block);
1421 if (tcf_block_shared(block)) {
1422 err = tcf_block_insert(block, net, extack);
1423 if (err)
1424 goto err_block_insert;
1425 }
1426 }
1427
1428 err = tcf_block_owner_add(block, q, ei->binder_type);
1429 if (err)
1430 goto err_block_owner_add;
1431
1432 tcf_block_owner_netif_keep_dst(block, q, ei->binder_type);
1433
1434 err = tcf_chain0_head_change_cb_add(block, ei, extack);
1435 if (err)
1436 goto err_chain0_head_change_cb_add;
1437
1438 err = tcf_block_offload_bind(block, q, ei, extack);
1439 if (err)
1440 goto err_block_offload_bind;
1441
1442 *p_block = block;
1443 return 0;
1444
1445 err_block_offload_bind:
1446 tcf_chain0_head_change_cb_del(block, ei);
1447 err_chain0_head_change_cb_add:
1448 tcf_block_owner_del(block, q, ei->binder_type);
1449 err_block_owner_add:
1450 err_block_insert:
1451 tcf_block_refcnt_put(block, true);
1452 return err;
1453 }
1454 EXPORT_SYMBOL(tcf_block_get_ext);
1455
1456 static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
1457 {
1458 struct tcf_proto __rcu **p_filter_chain = priv;
1459
1460 rcu_assign_pointer(*p_filter_chain, tp_head);
1461 }
1462
1463 int tcf_block_get(struct tcf_block **p_block,
1464 struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
1465 struct netlink_ext_ack *extack)
1466 {
1467 struct tcf_block_ext_info ei = {
1468 .chain_head_change = tcf_chain_head_change_dflt,
1469 .chain_head_change_priv = p_filter_chain,
1470 };
1471
1472 WARN_ON(!p_filter_chain);
1473 return tcf_block_get_ext(p_block, q, &ei, extack);
1474 }
1475 EXPORT_SYMBOL(tcf_block_get);
1476
1477 /* XXX: Standalone actions are not allowed to jump to any chain, and bound
1478 * actions should be all removed after flushing.
1479 */
1480 void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
1481 struct tcf_block_ext_info *ei)
1482 {
1483 if (!block)
1484 return;
1485 tcf_chain0_head_change_cb_del(block, ei);
1486 tcf_block_owner_del(block, q, ei->binder_type);
1487
1488 __tcf_block_put(block, q, ei, true);
1489 }
1490 EXPORT_SYMBOL(tcf_block_put_ext);
1491
1492 void tcf_block_put(struct tcf_block *block)
1493 {
1494 struct tcf_block_ext_info ei = {0, };
1495
1496 if (!block)
1497 return;
1498 tcf_block_put_ext(block, block->q, &ei);
1499 }
1500
1501 EXPORT_SYMBOL(tcf_block_put);
1502
1503 struct tcf_block_cb {
1504 struct list_head list;
1505 tc_setup_cb_t *cb;
1506 void *cb_ident;
1507 void *cb_priv;
1508 unsigned int refcnt;
1509 };
1510
1511 void *tcf_block_cb_priv(struct tcf_block_cb *block_cb)
1512 {
1513 return block_cb->cb_priv;
1514 }
1515 EXPORT_SYMBOL(tcf_block_cb_priv);
1516
1517 struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
1518 tc_setup_cb_t *cb, void *cb_ident)
1519 { struct tcf_block_cb *block_cb;
1520
1521 list_for_each_entry(block_cb, &block->cb_list, list)
1522 if (block_cb->cb == cb && block_cb->cb_ident == cb_ident)
1523 return block_cb;
1524 return NULL;
1525 }
1526 EXPORT_SYMBOL(tcf_block_cb_lookup);
1527
1528 void tcf_block_cb_incref(struct tcf_block_cb *block_cb)
1529 {
1530 block_cb->refcnt++;
1531 }
1532 EXPORT_SYMBOL(tcf_block_cb_incref);
1533
1534 unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
1535 {
1536 return --block_cb->refcnt;
1537 }
1538 EXPORT_SYMBOL(tcf_block_cb_decref);
1539
1540 static int
1541 tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
1542 void *cb_priv, bool add, bool offload_in_use,
1543 struct netlink_ext_ack *extack)
1544 {
1545 struct tcf_chain *chain, *chain_prev;
1546 struct tcf_proto *tp, *tp_prev;
1547 int err;
1548
1549 for (chain = __tcf_get_next_chain(block, NULL);
1550 chain;
1551 chain_prev = chain,
1552 chain = __tcf_get_next_chain(block, chain),
1553 tcf_chain_put(chain_prev)) {
1554 for (tp = __tcf_get_next_proto(chain, NULL); tp;
1555 tp_prev = tp,
1556 tp = __tcf_get_next_proto(chain, tp),
1557 tcf_proto_put(tp_prev, true, NULL)) {
1558 if (tp->ops->reoffload) {
1559 err = tp->ops->reoffload(tp, add, cb, cb_priv,
1560 extack);
1561 if (err && add)
1562 goto err_playback_remove;
1563 } else if (add && offload_in_use) {
1564 err = -EOPNOTSUPP;
1565 NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support");
1566 goto err_playback_remove;
1567 }
1568 }
1569 }
1570
1571 return 0;
1572
1573 err_playback_remove:
1574 tcf_proto_put(tp, true, NULL);
1575 tcf_chain_put(chain);
1576 tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
1577 extack);
1578 return err;
1579 }
1580
1581 struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
1582 tc_setup_cb_t *cb, void *cb_ident,
1583 void *cb_priv,
1584 struct netlink_ext_ack *extack)
1585 {
1586 struct tcf_block_cb *block_cb;
1587 int err;
1588
1589 /* Replay any already present rules */
1590 err = tcf_block_playback_offloads(block, cb, cb_priv, true,
1591 tcf_block_offload_in_use(block),
1592 extack);
1593 if (err)
1594 return ERR_PTR(err);
1595
1596 block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
1597 if (!block_cb)
1598 return ERR_PTR(-ENOMEM);
1599 block_cb->cb = cb;
1600 block_cb->cb_ident = cb_ident;
1601 block_cb->cb_priv = cb_priv;
1602 list_add(&block_cb->list, &block->cb_list);
1603 return block_cb;
1604 }
1605 EXPORT_SYMBOL(__tcf_block_cb_register);
1606
1607 int tcf_block_cb_register(struct tcf_block *block,
1608 tc_setup_cb_t *cb, void *cb_ident,
1609 void *cb_priv, struct netlink_ext_ack *extack)
1610 {
1611 struct tcf_block_cb *block_cb;
1612
1613 block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv,
1614 extack);
1615 return PTR_ERR_OR_ZERO(block_cb);
1616 }
1617 EXPORT_SYMBOL(tcf_block_cb_register);
1618
1619 void __tcf_block_cb_unregister(struct tcf_block *block,
1620 struct tcf_block_cb *block_cb)
1621 {
1622 tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv,
1623 false, tcf_block_offload_in_use(block),
1624 NULL);
1625 list_del(&block_cb->list);
1626 kfree(block_cb);
1627 }
1628 EXPORT_SYMBOL(__tcf_block_cb_unregister);
1629
1630 void tcf_block_cb_unregister(struct tcf_block *block,
1631 tc_setup_cb_t *cb, void *cb_ident)
1632 {
1633 struct tcf_block_cb *block_cb;
1634
1635 block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
1636 if (!block_cb)
1637 return;
1638 __tcf_block_cb_unregister(block, block_cb);
1639 }
1640 EXPORT_SYMBOL(tcf_block_cb_unregister);
1641
1642 /* Main classifier routine: scans classifier chain attached
1643 * to this qdisc, (optionally) tests for protocol and asks
1644 * specific classifiers.
1645 */
1646 int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
1647 struct tcf_result *res, bool compat_mode)
1648 {
1649 #ifdef CONFIG_NET_CLS_ACT
1650 const int max_reclassify_loop = 4;
1651 const struct tcf_proto *orig_tp = tp;
1652 const struct tcf_proto *first_tp;
1653 int limit = 0;
1654
1655 reclassify:
1656 #endif
1657 for (; tp; tp = rcu_dereference_bh(tp->next)) {
1658 __be16 protocol = tc_skb_protocol(skb);
1659 int err;
1660
1661 if (tp->protocol != protocol &&
1662 tp->protocol != htons(ETH_P_ALL))
1663 continue;
1664
1665 err = tp->classify(skb, tp, res);
1666 #ifdef CONFIG_NET_CLS_ACT
1667 if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
1668 first_tp = orig_tp;
1669 goto reset;
1670 } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
1671 first_tp = res->goto_tp;
1672 goto reset;
1673 }
1674 #endif
1675 if (err >= 0)
1676 return err;
1677 }
1678
1679 return TC_ACT_UNSPEC; /* signal: continue lookup */
1680 #ifdef CONFIG_NET_CLS_ACT
1681 reset:
1682 if (unlikely(limit++ >= max_reclassify_loop)) {
1683 net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n",
1684 tp->chain->block->index,
1685 tp->prio & 0xffff,
1686 ntohs(tp->protocol));
1687 return TC_ACT_SHOT;
1688 }
1689
1690 tp = first_tp;
1691 goto reclassify;
1692 #endif
1693 }
1694 EXPORT_SYMBOL(tcf_classify);
1695
1696 struct tcf_chain_info {
1697 struct tcf_proto __rcu **pprev;
1698 struct tcf_proto __rcu *next;
1699 };
1700
1701 static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain,
1702 struct tcf_chain_info *chain_info)
1703 {
1704 return tcf_chain_dereference(*chain_info->pprev, chain);
1705 }
1706
1707 static int tcf_chain_tp_insert(struct tcf_chain *chain,
1708 struct tcf_chain_info *chain_info,
1709 struct tcf_proto *tp)
1710 {
1711 if (chain->flushing)
1712 return -EAGAIN;
1713
1714 if (*chain_info->pprev == chain->filter_chain)
1715 tcf_chain0_head_change(chain, tp);
1716 tcf_proto_get(tp);
1717 RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
1718 rcu_assign_pointer(*chain_info->pprev, tp);
1719
1720 return 0;
1721 }
1722
1723 static void tcf_chain_tp_remove(struct tcf_chain *chain,
1724 struct tcf_chain_info *chain_info,
1725 struct tcf_proto *tp)
1726 {
1727 struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
1728
1729 tcf_proto_mark_delete(tp);
1730 if (tp == chain->filter_chain)
1731 tcf_chain0_head_change(chain, next);
1732 RCU_INIT_POINTER(*chain_info->pprev, next);
1733 }
1734
1735 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
1736 struct tcf_chain_info *chain_info,
1737 u32 protocol, u32 prio,
1738 bool prio_allocate);
1739
1740 /* Try to insert new proto.
1741 * If proto with specified priority already exists, free new proto
1742 * and return existing one.
1743 */
1744
1745 static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
1746 struct tcf_proto *tp_new,
1747 u32 protocol, u32 prio,
1748 bool rtnl_held)
1749 {
1750 struct tcf_chain_info chain_info;
1751 struct tcf_proto *tp;
1752 int err = 0;
1753
1754 mutex_lock(&chain->filter_chain_lock);
1755
1756 tp = tcf_chain_tp_find(chain, &chain_info,
1757 protocol, prio, false);
1758 if (!tp)
1759 err = tcf_chain_tp_insert(chain, &chain_info, tp_new);
1760 mutex_unlock(&chain->filter_chain_lock);
1761
1762 if (tp) {
1763 tcf_proto_destroy(tp_new, rtnl_held, NULL);
1764 tp_new = tp;
1765 } else if (err) {
1766 tcf_proto_destroy(tp_new, rtnl_held, NULL);
1767 tp_new = ERR_PTR(err);
1768 }
1769
1770 return tp_new;
1771 }
1772
1773 static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
1774 struct tcf_proto *tp, bool rtnl_held,
1775 struct netlink_ext_ack *extack)
1776 {
1777 struct tcf_chain_info chain_info;
1778 struct tcf_proto *tp_iter;
1779 struct tcf_proto **pprev;
1780 struct tcf_proto *next;
1781
1782 mutex_lock(&chain->filter_chain_lock);
1783
1784 /* Atomically find and remove tp from chain. */
1785 for (pprev = &chain->filter_chain;
1786 (tp_iter = tcf_chain_dereference(*pprev, chain));
1787 pprev = &tp_iter->next) {
1788 if (tp_iter == tp) {
1789 chain_info.pprev = pprev;
1790 chain_info.next = tp_iter->next;
1791 WARN_ON(tp_iter->deleting);
1792 break;
1793 }
1794 }
1795 /* Verify that tp still exists and no new filters were inserted
1796 * concurrently.
1797 * Mark tp for deletion if it is empty.
1798 */
1799 if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) {
1800 mutex_unlock(&chain->filter_chain_lock);
1801 return;
1802 }
1803
1804 next = tcf_chain_dereference(chain_info.next, chain);
1805 if (tp == chain->filter_chain)
1806 tcf_chain0_head_change(chain, next);
1807 RCU_INIT_POINTER(*chain_info.pprev, next);
1808 mutex_unlock(&chain->filter_chain_lock);
1809
1810 tcf_proto_put(tp, rtnl_held, extack);
1811 }
1812
1813 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
1814 struct tcf_chain_info *chain_info,
1815 u32 protocol, u32 prio,
1816 bool prio_allocate)
1817 {
1818 struct tcf_proto **pprev;
1819 struct tcf_proto *tp;
1820
1821 /* Check the chain for existence of proto-tcf with this priority */
1822 for (pprev = &chain->filter_chain;
1823 (tp = tcf_chain_dereference(*pprev, chain));
1824 pprev = &tp->next) {
1825 if (tp->prio >= prio) {
1826 if (tp->prio == prio) {
1827 if (prio_allocate ||
1828 (tp->protocol != protocol && protocol))
1829 return ERR_PTR(-EINVAL);
1830 } else {
1831 tp = NULL;
1832 }
1833 break;
1834 }
1835 }
1836 chain_info->pprev = pprev;
1837 if (tp) {
1838 chain_info->next = tp->next;
1839 tcf_proto_get(tp);
1840 } else {
1841 chain_info->next = NULL;
1842 }
1843 return tp;
1844 }
1845
1846 static int tcf_fill_node(struct net *net, struct sk_buff *skb,
1847 struct tcf_proto *tp, struct tcf_block *block,
1848 struct Qdisc *q, u32 parent, void *fh,
1849 u32 portid, u32 seq, u16 flags, int event,
1850 bool rtnl_held)
1851 {
1852 struct tcmsg *tcm;
1853 struct nlmsghdr *nlh;
1854 unsigned char *b = skb_tail_pointer(skb);
1855
1856 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1857 if (!nlh)
1858 goto out_nlmsg_trim;
1859 tcm = nlmsg_data(nlh);
1860 tcm->tcm_family = AF_UNSPEC;
1861 tcm->tcm__pad1 = 0;
1862 tcm->tcm__pad2 = 0;
1863 if (q) {
1864 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1865 tcm->tcm_parent = parent;
1866 } else {
1867 tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
1868 tcm->tcm_block_index = block->index;
1869 }
1870 tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
1871 if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
1872 goto nla_put_failure;
1873 if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
1874 goto nla_put_failure;
1875 if (!fh) {
1876 tcm->tcm_handle = 0;
1877 } else {
1878 if (tp->ops->dump &&
1879 tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
1880 goto nla_put_failure;
1881 }
1882 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1883 return skb->len;
1884
1885 out_nlmsg_trim:
1886 nla_put_failure:
1887 nlmsg_trim(skb, b);
1888 return -1;
1889 }
1890
1891 static int tfilter_notify(struct net *net, struct sk_buff *oskb,
1892 struct nlmsghdr *n, struct tcf_proto *tp,
1893 struct tcf_block *block, struct Qdisc *q,
1894 u32 parent, void *fh, int event, bool unicast,
1895 bool rtnl_held)
1896 {
1897 struct sk_buff *skb;
1898 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1899
1900 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1901 if (!skb)
1902 return -ENOBUFS;
1903
1904 if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1905 n->nlmsg_seq, n->nlmsg_flags, event,
1906 rtnl_held) <= 0) {
1907 kfree_skb(skb);
1908 return -EINVAL;
1909 }
1910
1911 if (unicast)
1912 return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1913
1914 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1915 n->nlmsg_flags & NLM_F_ECHO);
1916 }
1917
1918 static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
1919 struct nlmsghdr *n, struct tcf_proto *tp,
1920 struct tcf_block *block, struct Qdisc *q,
1921 u32 parent, void *fh, bool unicast, bool *last,
1922 bool rtnl_held, struct netlink_ext_ack *extack)
1923 {
1924 struct sk_buff *skb;
1925 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1926 int err;
1927
1928 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1929 if (!skb)
1930 return -ENOBUFS;
1931
1932 if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1933 n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
1934 rtnl_held) <= 0) {
1935 NL_SET_ERR_MSG(extack, "Failed to build del event notification");
1936 kfree_skb(skb);
1937 return -EINVAL;
1938 }
1939
1940 err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
1941 if (err) {
1942 kfree_skb(skb);
1943 return err;
1944 }
1945
1946 if (unicast)
1947 return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1948
1949 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1950 n->nlmsg_flags & NLM_F_ECHO);
1951 if (err < 0)
1952 NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
1953 return err;
1954 }
1955
1956 static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
1957 struct tcf_block *block, struct Qdisc *q,
1958 u32 parent, struct nlmsghdr *n,
1959 struct tcf_chain *chain, int event,
1960 bool rtnl_held)
1961 {
1962 struct tcf_proto *tp;
1963
1964 for (tp = tcf_get_next_proto(chain, NULL, rtnl_held);
1965 tp; tp = tcf_get_next_proto(chain, tp, rtnl_held))
1966 tfilter_notify(net, oskb, n, tp, block,
1967 q, parent, NULL, event, false, rtnl_held);
1968 }
1969
1970 static void tfilter_put(struct tcf_proto *tp, void *fh)
1971 {
1972 if (tp->ops->put && fh)
1973 tp->ops->put(tp, fh);
1974 }
1975
1976 static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
1977 struct netlink_ext_ack *extack)
1978 {
1979 struct net *net = sock_net(skb->sk);
1980 struct nlattr *tca[TCA_MAX + 1];
1981 struct tcmsg *t;
1982 u32 protocol;
1983 u32 prio;
1984 bool prio_allocate;
1985 u32 parent;
1986 u32 chain_index;
1987 struct Qdisc *q = NULL;
1988 struct tcf_chain_info chain_info;
1989 struct tcf_chain *chain = NULL;
1990 struct tcf_block *block;
1991 struct tcf_proto *tp;
1992 unsigned long cl;
1993 void *fh;
1994 int err;
1995 int tp_created;
1996 bool rtnl_held = false;
1997
1998 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1999 return -EPERM;
2000
2001 replay:
2002 tp_created = 0;
2003
2004 err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
2005 if (err < 0)
2006 return err;
2007
2008 t = nlmsg_data(n);
2009 protocol = TC_H_MIN(t->tcm_info);
2010 prio = TC_H_MAJ(t->tcm_info);
2011 prio_allocate = false;
2012 parent = t->tcm_parent;
2013 tp = NULL;
2014 cl = 0;
2015 block = NULL;
2016
2017 if (prio == 0) {
2018 /* If no priority is provided by the user,
2019 * we allocate one.
2020 */
2021 if (n->nlmsg_flags & NLM_F_CREATE) {
2022 prio = TC_H_MAKE(0x80000000U, 0U);
2023 prio_allocate = true;
2024 } else {
2025 NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
2026 return -ENOENT;
2027 }
2028 }
2029
2030 /* Find head of filter chain. */
2031
2032 err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2033 if (err)
2034 return err;
2035
2036 /* Take rtnl mutex if rtnl_held was set to true on previous iteration,
2037 * block is shared (no qdisc found), qdisc is not unlocked, classifier
2038 * type is not specified, classifier is not unlocked.
2039 */
2040 if (rtnl_held ||
2041 (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2042 !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
2043 rtnl_held = true;
2044 rtnl_lock();
2045 }
2046
2047 err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2048 if (err)
2049 goto errout;
2050
2051 block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2052 extack);
2053 if (IS_ERR(block)) {
2054 err = PTR_ERR(block);
2055 goto errout;
2056 }
2057
2058 chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2059 if (chain_index > TC_ACT_EXT_VAL_MASK) {
2060 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2061 err = -EINVAL;
2062 goto errout;
2063 }
2064 chain = tcf_chain_get(block, chain_index, true);
2065 if (!chain) {
2066 NL_SET_ERR_MSG(extack, "Cannot create specified filter chain");
2067 err = -ENOMEM;
2068 goto errout;
2069 }
2070
2071 mutex_lock(&chain->filter_chain_lock);
2072 tp = tcf_chain_tp_find(chain, &chain_info, protocol,
2073 prio, prio_allocate);
2074 if (IS_ERR(tp)) {
2075 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2076 err = PTR_ERR(tp);
2077 goto errout_locked;
2078 }
2079
2080 if (tp == NULL) {
2081 struct tcf_proto *tp_new = NULL;
2082
2083 if (chain->flushing) {
2084 err = -EAGAIN;
2085 goto errout_locked;
2086 }
2087
2088 /* Proto-tcf does not exist, create new one */
2089
2090 if (tca[TCA_KIND] == NULL || !protocol) {
2091 NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
2092 err = -EINVAL;
2093 goto errout_locked;
2094 }
2095
2096 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2097 NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2098 err = -ENOENT;
2099 goto errout_locked;
2100 }
2101
2102 if (prio_allocate)
2103 prio = tcf_auto_prio(tcf_chain_tp_prev(chain,
2104 &chain_info));
2105
2106 mutex_unlock(&chain->filter_chain_lock);
2107 tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]),
2108 protocol, prio, chain, rtnl_held,
2109 extack);
2110 if (IS_ERR(tp_new)) {
2111 err = PTR_ERR(tp_new);
2112 goto errout_tp;
2113 }
2114
2115 tp_created = 1;
2116 tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio,
2117 rtnl_held);
2118 if (IS_ERR(tp)) {
2119 err = PTR_ERR(tp);
2120 goto errout_tp;
2121 }
2122 } else {
2123 mutex_unlock(&chain->filter_chain_lock);
2124 }
2125
2126 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
2127 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
2128 err = -EINVAL;
2129 goto errout;
2130 }
2131
2132 fh = tp->ops->get(tp, t->tcm_handle);
2133
2134 if (!fh) {
2135 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2136 NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2137 err = -ENOENT;
2138 goto errout;
2139 }
2140 } else if (n->nlmsg_flags & NLM_F_EXCL) {
2141 tfilter_put(tp, fh);
2142 NL_SET_ERR_MSG(extack, "Filter already exists");
2143 err = -EEXIST;
2144 goto errout;
2145 }
2146
2147 if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
2148 NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
2149 err = -EINVAL;
2150 goto errout;
2151 }
2152
2153 err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
2154 n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
2155 rtnl_held, extack);
2156 if (err == 0) {
2157 tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2158 RTM_NEWTFILTER, false, rtnl_held);
2159 tfilter_put(tp, fh);
2160 }
2161
2162 errout:
2163 if (err && tp_created)
2164 tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL);
2165 errout_tp:
2166 if (chain) {
2167 if (tp && !IS_ERR(tp))
2168 tcf_proto_put(tp, rtnl_held, NULL);
2169 if (!tp_created)
2170 tcf_chain_put(chain);
2171 }
2172 tcf_block_release(q, block, rtnl_held);
2173
2174 if (rtnl_held)
2175 rtnl_unlock();
2176
2177 if (err == -EAGAIN) {
2178 /* Take rtnl lock in case EAGAIN is caused by concurrent flush
2179 * of target chain.
2180 */
2181 rtnl_held = true;
2182 /* Replay the request. */
2183 goto replay;
2184 }
2185 return err;
2186
2187 errout_locked:
2188 mutex_unlock(&chain->filter_chain_lock);
2189 goto errout;
2190 }
2191
2192 static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
2193 struct netlink_ext_ack *extack)
2194 {
2195 struct net *net = sock_net(skb->sk);
2196 struct nlattr *tca[TCA_MAX + 1];
2197 struct tcmsg *t;
2198 u32 protocol;
2199 u32 prio;
2200 u32 parent;
2201 u32 chain_index;
2202 struct Qdisc *q = NULL;
2203 struct tcf_chain_info chain_info;
2204 struct tcf_chain *chain = NULL;
2205 struct tcf_block *block = NULL;
2206 struct tcf_proto *tp = NULL;
2207 unsigned long cl = 0;
2208 void *fh = NULL;
2209 int err;
2210 bool rtnl_held = false;
2211
2212 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
2213 return -EPERM;
2214
2215 err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
2216 if (err < 0)
2217 return err;
2218
2219 t = nlmsg_data(n);
2220 protocol = TC_H_MIN(t->tcm_info);
2221 prio = TC_H_MAJ(t->tcm_info);
2222 parent = t->tcm_parent;
2223
2224 if (prio == 0 && (protocol || t->tcm_handle || tca[TCA_KIND])) {
2225 NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set");
2226 return -ENOENT;
2227 }
2228
2229 /* Find head of filter chain. */
2230
2231 err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2232 if (err)
2233 return err;
2234
2235 /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
2236 * found), qdisc is not unlocked, classifier type is not specified,
2237 * classifier is not unlocked.
2238 */
2239 if (!prio ||
2240 (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2241 !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
2242 rtnl_held = true;
2243 rtnl_lock();
2244 }
2245
2246 err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2247 if (err)
2248 goto errout;
2249
2250 block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2251 extack);
2252 if (IS_ERR(block)) {
2253 err = PTR_ERR(block);
2254 goto errout;
2255 }
2256
2257 chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2258 if (chain_index > TC_ACT_EXT_VAL_MASK) {
2259 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2260 err = -EINVAL;
2261 goto errout;
2262 }
2263 chain = tcf_chain_get(block, chain_index, false);
2264 if (!chain) {
2265 /* User requested flush on non-existent chain. Nothing to do,
2266 * so just return success.
2267 */
2268 if (prio == 0) {
2269 err = 0;
2270 goto errout;
2271 }
2272 NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2273 err = -ENOENT;
2274 goto errout;
2275 }
2276
2277 if (prio == 0) {
2278 tfilter_notify_chain(net, skb, block, q, parent, n,
2279 chain, RTM_DELTFILTER, rtnl_held);
2280 tcf_chain_flush(chain, rtnl_held);
2281 err = 0;
2282 goto errout;
2283 }
2284
2285 mutex_lock(&chain->filter_chain_lock);
2286 tp = tcf_chain_tp_find(chain, &chain_info, protocol,
2287 prio, false);
2288 if (!tp || IS_ERR(tp)) {
2289 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2290 err = tp ? PTR_ERR(tp) : -ENOENT;
2291 goto errout_locked;
2292 } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
2293 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
2294 err = -EINVAL;
2295 goto errout_locked;
2296 } else if (t->tcm_handle == 0) {
2297 tcf_chain_tp_remove(chain, &chain_info, tp);
2298 mutex_unlock(&chain->filter_chain_lock);
2299
2300 tcf_proto_put(tp, rtnl_held, NULL);
2301 tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2302 RTM_DELTFILTER, false, rtnl_held);
2303 err = 0;
2304 goto errout;
2305 }
2306 mutex_unlock(&chain->filter_chain_lock);
2307
2308 fh = tp->ops->get(tp, t->tcm_handle);
2309
2310 if (!fh) {
2311 NL_SET_ERR_MSG(extack, "Specified filter handle not found");
2312 err = -ENOENT;
2313 } else {
2314 bool last;
2315
2316 err = tfilter_del_notify(net, skb, n, tp, block,
2317 q, parent, fh, false, &last,
2318 rtnl_held, extack);
2319
2320 if (err)
2321 goto errout;
2322 if (last)
2323 tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack);
2324 }
2325
2326 errout:
2327 if (chain) {
2328 if (tp && !IS_ERR(tp))
2329 tcf_proto_put(tp, rtnl_held, NULL);
2330 tcf_chain_put(chain);
2331 }
2332 tcf_block_release(q, block, rtnl_held);
2333
2334 if (rtnl_held)
2335 rtnl_unlock();
2336
2337 return err;
2338
2339 errout_locked:
2340 mutex_unlock(&chain->filter_chain_lock);
2341 goto errout;
2342 }
2343
2344 static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
2345 struct netlink_ext_ack *extack)
2346 {
2347 struct net *net = sock_net(skb->sk);
2348 struct nlattr *tca[TCA_MAX + 1];
2349 struct tcmsg *t;
2350 u32 protocol;
2351 u32 prio;
2352 u32 parent;
2353 u32 chain_index;
2354 struct Qdisc *q = NULL;
2355 struct tcf_chain_info chain_info;
2356 struct tcf_chain *chain = NULL;
2357 struct tcf_block *block = NULL;
2358 struct tcf_proto *tp = NULL;
2359 unsigned long cl = 0;
2360 void *fh = NULL;
2361 int err;
2362 bool rtnl_held = false;
2363
2364 err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
2365 if (err < 0)
2366 return err;
2367
2368 t = nlmsg_data(n);
2369 protocol = TC_H_MIN(t->tcm_info);
2370 prio = TC_H_MAJ(t->tcm_info);
2371 parent = t->tcm_parent;
2372
2373 if (prio == 0) {
2374 NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
2375 return -ENOENT;
2376 }
2377
2378 /* Find head of filter chain. */
2379
2380 err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2381 if (err)
2382 return err;
2383
2384 /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
2385 * unlocked, classifier type is not specified, classifier is not
2386 * unlocked.
2387 */
2388 if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2389 !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
2390 rtnl_held = true;
2391 rtnl_lock();
2392 }
2393
2394 err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2395 if (err)
2396 goto errout;
2397
2398 block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2399 extack);
2400 if (IS_ERR(block)) {
2401 err = PTR_ERR(block);
2402 goto errout;
2403 }
2404
2405 chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2406 if (chain_index > TC_ACT_EXT_VAL_MASK) {
2407 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2408 err = -EINVAL;
2409 goto errout;
2410 }
2411 chain = tcf_chain_get(block, chain_index, false);
2412 if (!chain) {
2413 NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2414 err = -EINVAL;
2415 goto errout;
2416 }
2417
2418 mutex_lock(&chain->filter_chain_lock);
2419 tp = tcf_chain_tp_find(chain, &chain_info, protocol,
2420 prio, false);
2421 mutex_unlock(&chain->filter_chain_lock);
2422 if (!tp || IS_ERR(tp)) {
2423 NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2424 err = tp ? PTR_ERR(tp) : -ENOENT;
2425 goto errout;
2426 } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
2427 NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
2428 err = -EINVAL;
2429 goto errout;
2430 }
2431
2432 fh = tp->ops->get(tp, t->tcm_handle);
2433
2434 if (!fh) {
2435 NL_SET_ERR_MSG(extack, "Specified filter handle not found");
2436 err = -ENOENT;
2437 } else {
2438 err = tfilter_notify(net, skb, n, tp, block, q, parent,
2439 fh, RTM_NEWTFILTER, true, rtnl_held);
2440 if (err < 0)
2441 NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
2442 }
2443
2444 tfilter_put(tp, fh);
2445 errout:
2446 if (chain) {
2447 if (tp && !IS_ERR(tp))
2448 tcf_proto_put(tp, rtnl_held, NULL);
2449 tcf_chain_put(chain);
2450 }
2451 tcf_block_release(q, block, rtnl_held);
2452
2453 if (rtnl_held)
2454 rtnl_unlock();
2455
2456 return err;
2457 }
2458
2459 struct tcf_dump_args {
2460 struct tcf_walker w;
2461 struct sk_buff *skb;
2462 struct netlink_callback *cb;
2463 struct tcf_block *block;
2464 struct Qdisc *q;
2465 u32 parent;
2466 };
2467
2468 static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
2469 {
2470 struct tcf_dump_args *a = (void *)arg;
2471 struct net *net = sock_net(a->skb->sk);
2472
2473 return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
2474 n, NETLINK_CB(a->cb->skb).portid,
2475 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2476 RTM_NEWTFILTER, true);
2477 }
2478
2479 static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
2480 struct sk_buff *skb, struct netlink_callback *cb,
2481 long index_start, long *p_index)
2482 {
2483 struct net *net = sock_net(skb->sk);
2484 struct tcf_block *block = chain->block;
2485 struct tcmsg *tcm = nlmsg_data(cb->nlh);
2486 struct tcf_proto *tp, *tp_prev;
2487 struct tcf_dump_args arg;
2488
2489 for (tp = __tcf_get_next_proto(chain, NULL);
2490 tp;
2491 tp_prev = tp,
2492 tp = __tcf_get_next_proto(chain, tp),
2493 tcf_proto_put(tp_prev, true, NULL),
2494 (*p_index)++) {
2495 if (*p_index < index_start)
2496 continue;
2497 if (TC_H_MAJ(tcm->tcm_info) &&
2498 TC_H_MAJ(tcm->tcm_info) != tp->prio)
2499 continue;
2500 if (TC_H_MIN(tcm->tcm_info) &&
2501 TC_H_MIN(tcm->tcm_info) != tp->protocol)
2502 continue;
2503 if (*p_index > index_start)
2504 memset(&cb->args[1], 0,
2505 sizeof(cb->args) - sizeof(cb->args[0]));
2506 if (cb->args[1] == 0) {
2507 if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
2508 NETLINK_CB(cb->skb).portid,
2509 cb->nlh->nlmsg_seq, NLM_F_MULTI,
2510 RTM_NEWTFILTER, true) <= 0)
2511 goto errout;
2512 cb->args[1] = 1;
2513 }
2514 if (!tp->ops->walk)
2515 continue;
2516 arg.w.fn = tcf_node_dump;
2517 arg.skb = skb;
2518 arg.cb = cb;
2519 arg.block = block;
2520 arg.q = q;
2521 arg.parent = parent;
2522 arg.w.stop = 0;
2523 arg.w.skip = cb->args[1] - 1;
2524 arg.w.count = 0;
2525 arg.w.cookie = cb->args[2];
2526 tp->ops->walk(tp, &arg.w, true);
2527 cb->args[2] = arg.w.cookie;
2528 cb->args[1] = arg.w.count + 1;
2529 if (arg.w.stop)
2530 goto errout;
2531 }
2532 return true;
2533
2534 errout:
2535 tcf_proto_put(tp, true, NULL);
2536 return false;
2537 }
2538
2539 /* called with RTNL */
2540 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
2541 {
2542 struct tcf_chain *chain, *chain_prev;
2543 struct net *net = sock_net(skb->sk);
2544 struct nlattr *tca[TCA_MAX + 1];
2545 struct Qdisc *q = NULL;
2546 struct tcf_block *block;
2547 struct tcmsg *tcm = nlmsg_data(cb->nlh);
2548 long index_start;
2549 long index;
2550 u32 parent;
2551 int err;
2552
2553 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2554 return skb->len;
2555
2556 err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL,
2557 cb->extack);
2558 if (err)
2559 return err;
2560
2561 if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2562 block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2563 if (!block)
2564 goto out;
2565 /* If we work with block index, q is NULL and parent value
2566 * will never be used in the following code. The check
2567 * in tcf_fill_node prevents it. However, compiler does not
2568 * see that far, so set parent to zero to silence the warning
2569 * about parent being uninitialized.
2570 */
2571 parent = 0;
2572 } else {
2573 const struct Qdisc_class_ops *cops;
2574 struct net_device *dev;
2575 unsigned long cl = 0;
2576
2577 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2578 if (!dev)
2579 return skb->len;
2580
2581 parent = tcm->tcm_parent;
2582 if (!parent) {
2583 q = dev->qdisc;
2584 parent = q->handle;
2585 } else {
2586 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
2587 }
2588 if (!q)
2589 goto out;
2590 cops = q->ops->cl_ops;
2591 if (!cops)
2592 goto out;
2593 if (!cops->tcf_block)
2594 goto out;
2595 if (TC_H_MIN(tcm->tcm_parent)) {
2596 cl = cops->find(q, tcm->tcm_parent);
2597 if (cl == 0)
2598 goto out;
2599 }
2600 block = cops->tcf_block(q, cl, NULL);
2601 if (!block)
2602 goto out;
2603 if (tcf_block_shared(block))
2604 q = NULL;
2605 }
2606
2607 index_start = cb->args[0];
2608 index = 0;
2609
2610 for (chain = __tcf_get_next_chain(block, NULL);
2611 chain;
2612 chain_prev = chain,
2613 chain = __tcf_get_next_chain(block, chain),
2614 tcf_chain_put(chain_prev)) {
2615 if (tca[TCA_CHAIN] &&
2616 nla_get_u32(tca[TCA_CHAIN]) != chain->index)
2617 continue;
2618 if (!tcf_chain_dump(chain, q, parent, skb, cb,
2619 index_start, &index)) {
2620 tcf_chain_put(chain);
2621 err = -EMSGSIZE;
2622 break;
2623 }
2624 }
2625
2626 if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
2627 tcf_block_refcnt_put(block, true);
2628 cb->args[0] = index;
2629
2630 out:
2631 /* If we did no progress, the error (EMSGSIZE) is real */
2632 if (skb->len == 0 && err)
2633 return err;
2634 return skb->len;
2635 }
2636
2637 static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
2638 void *tmplt_priv, u32 chain_index,
2639 struct net *net, struct sk_buff *skb,
2640 struct tcf_block *block,
2641 u32 portid, u32 seq, u16 flags, int event)
2642 {
2643 unsigned char *b = skb_tail_pointer(skb);
2644 const struct tcf_proto_ops *ops;
2645 struct nlmsghdr *nlh;
2646 struct tcmsg *tcm;
2647 void *priv;
2648
2649 ops = tmplt_ops;
2650 priv = tmplt_priv;
2651
2652 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
2653 if (!nlh)
2654 goto out_nlmsg_trim;
2655 tcm = nlmsg_data(nlh);
2656 tcm->tcm_family = AF_UNSPEC;
2657 tcm->tcm__pad1 = 0;
2658 tcm->tcm__pad2 = 0;
2659 tcm->tcm_handle = 0;
2660 if (block->q) {
2661 tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex;
2662 tcm->tcm_parent = block->q->handle;
2663 } else {
2664 tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
2665 tcm->tcm_block_index = block->index;
2666 }
2667
2668 if (nla_put_u32(skb, TCA_CHAIN, chain_index))
2669 goto nla_put_failure;
2670
2671 if (ops) {
2672 if (nla_put_string(skb, TCA_KIND, ops->kind))
2673 goto nla_put_failure;
2674 if (ops->tmplt_dump(skb, net, priv) < 0)
2675 goto nla_put_failure;
2676 }
2677
2678 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
2679 return skb->len;
2680
2681 out_nlmsg_trim:
2682 nla_put_failure:
2683 nlmsg_trim(skb, b);
2684 return -EMSGSIZE;
2685 }
2686
2687 static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
2688 u32 seq, u16 flags, int event, bool unicast)
2689 {
2690 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2691 struct tcf_block *block = chain->block;
2692 struct net *net = block->net;
2693 struct sk_buff *skb;
2694
2695 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2696 if (!skb)
2697 return -ENOBUFS;
2698
2699 if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
2700 chain->index, net, skb, block, portid,
2701 seq, flags, event) <= 0) {
2702 kfree_skb(skb);
2703 return -EINVAL;
2704 }
2705
2706 if (unicast)
2707 return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
2708
2709 return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
2710 }
2711
2712 static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
2713 void *tmplt_priv, u32 chain_index,
2714 struct tcf_block *block, struct sk_buff *oskb,
2715 u32 seq, u16 flags, bool unicast)
2716 {
2717 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2718 struct net *net = block->net;
2719 struct sk_buff *skb;
2720
2721 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2722 if (!skb)
2723 return -ENOBUFS;
2724
2725 if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb,
2726 block, portid, seq, flags, RTM_DELCHAIN) <= 0) {
2727 kfree_skb(skb);
2728 return -EINVAL;
2729 }
2730
2731 if (unicast)
2732 return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
2733
2734 return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
2735 }
2736
2737 static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
2738 struct nlattr **tca,
2739 struct netlink_ext_ack *extack)
2740 {
2741 const struct tcf_proto_ops *ops;
2742 void *tmplt_priv;
2743
2744 /* If kind is not set, user did not specify template. */
2745 if (!tca[TCA_KIND])
2746 return 0;
2747
2748 ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), true, extack);
2749 if (IS_ERR(ops))
2750 return PTR_ERR(ops);
2751 if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
2752 NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
2753 return -EOPNOTSUPP;
2754 }
2755
2756 tmplt_priv = ops->tmplt_create(net, chain, tca, extack);
2757 if (IS_ERR(tmplt_priv)) {
2758 module_put(ops->owner);
2759 return PTR_ERR(tmplt_priv);
2760 }
2761 chain->tmplt_ops = ops;
2762 chain->tmplt_priv = tmplt_priv;
2763 return 0;
2764 }
2765
2766 static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
2767 void *tmplt_priv)
2768 {
2769 /* If template ops are set, no work to do for us. */
2770 if (!tmplt_ops)
2771 return;
2772
2773 tmplt_ops->tmplt_destroy(tmplt_priv);
2774 module_put(tmplt_ops->owner);
2775 }
2776
2777 /* Add/delete/get a chain */
2778
2779 static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
2780 struct netlink_ext_ack *extack)
2781 {
2782 struct net *net = sock_net(skb->sk);
2783 struct nlattr *tca[TCA_MAX + 1];
2784 struct tcmsg *t;
2785 u32 parent;
2786 u32 chain_index;
2787 struct Qdisc *q = NULL;
2788 struct tcf_chain *chain = NULL;
2789 struct tcf_block *block;
2790 unsigned long cl;
2791 int err;
2792
2793 if (n->nlmsg_type != RTM_GETCHAIN &&
2794 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
2795 return -EPERM;
2796
2797 replay:
2798 err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
2799 if (err < 0)
2800 return err;
2801
2802 t = nlmsg_data(n);
2803 parent = t->tcm_parent;
2804 cl = 0;
2805
2806 block = tcf_block_find(net, &q, &parent, &cl,
2807 t->tcm_ifindex, t->tcm_block_index, extack);
2808 if (IS_ERR(block))
2809 return PTR_ERR(block);
2810
2811 chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2812 if (chain_index > TC_ACT_EXT_VAL_MASK) {
2813 NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2814 err = -EINVAL;
2815 goto errout_block;
2816 }
2817
2818 mutex_lock(&block->lock);
2819 chain = tcf_chain_lookup(block, chain_index);
2820 if (n->nlmsg_type == RTM_NEWCHAIN) {
2821 if (chain) {
2822 if (tcf_chain_held_by_acts_only(chain)) {
2823 /* The chain exists only because there is
2824 * some action referencing it.
2825 */
2826 tcf_chain_hold(chain);
2827 } else {
2828 NL_SET_ERR_MSG(extack, "Filter chain already exists");
2829 err = -EEXIST;
2830 goto errout_block_locked;
2831 }
2832 } else {
2833 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2834 NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
2835 err = -ENOENT;
2836 goto errout_block_locked;
2837 }
2838 chain = tcf_chain_create(block, chain_index);
2839 if (!chain) {
2840 NL_SET_ERR_MSG(extack, "Failed to create filter chain");
2841 err = -ENOMEM;
2842 goto errout_block_locked;
2843 }
2844 }
2845 } else {
2846 if (!chain || tcf_chain_held_by_acts_only(chain)) {
2847 NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2848 err = -EINVAL;
2849 goto errout_block_locked;
2850 }
2851 tcf_chain_hold(chain);
2852 }
2853
2854 if (n->nlmsg_type == RTM_NEWCHAIN) {
2855 /* Modifying chain requires holding parent block lock. In case
2856 * the chain was successfully added, take a reference to the
2857 * chain. This ensures that an empty chain does not disappear at
2858 * the end of this function.
2859 */
2860 tcf_chain_hold(chain);
2861 chain->explicitly_created = true;
2862 }
2863 mutex_unlock(&block->lock);
2864
2865 switch (n->nlmsg_type) {
2866 case RTM_NEWCHAIN:
2867 err = tc_chain_tmplt_add(chain, net, tca, extack);
2868 if (err) {
2869 tcf_chain_put_explicitly_created(chain);
2870 goto errout;
2871 }
2872
2873 tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
2874 RTM_NEWCHAIN, false);
2875 break;
2876 case RTM_DELCHAIN:
2877 tfilter_notify_chain(net, skb, block, q, parent, n,
2878 chain, RTM_DELTFILTER, true);
2879 /* Flush the chain first as the user requested chain removal. */
2880 tcf_chain_flush(chain, true);
2881 /* In case the chain was successfully deleted, put a reference
2882 * to the chain previously taken during addition.
2883 */
2884 tcf_chain_put_explicitly_created(chain);
2885 break;
2886 case RTM_GETCHAIN:
2887 err = tc_chain_notify(chain, skb, n->nlmsg_seq,
2888 n->nlmsg_seq, n->nlmsg_type, true);
2889 if (err < 0)
2890 NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
2891 break;
2892 default:
2893 err = -EOPNOTSUPP;
2894 NL_SET_ERR_MSG(extack, "Unsupported message type");
2895 goto errout;
2896 }
2897
2898 errout:
2899 tcf_chain_put(chain);
2900 errout_block:
2901 tcf_block_release(q, block, true);
2902 if (err == -EAGAIN)
2903 /* Replay the request. */
2904 goto replay;
2905 return err;
2906
2907 errout_block_locked:
2908 mutex_unlock(&block->lock);
2909 goto errout_block;
2910 }
2911
2912 /* called with RTNL */
2913 static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
2914 {
2915 struct net *net = sock_net(skb->sk);
2916 struct nlattr *tca[TCA_MAX + 1];
2917 struct Qdisc *q = NULL;
2918 struct tcf_block *block;
2919 struct tcmsg *tcm = nlmsg_data(cb->nlh);
2920 struct tcf_chain *chain;
2921 long index_start;
2922 long index;
2923 u32 parent;
2924 int err;
2925
2926 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2927 return skb->len;
2928
2929 err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
2930 cb->extack);
2931 if (err)
2932 return err;
2933
2934 if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2935 block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2936 if (!block)
2937 goto out;
2938 /* If we work with block index, q is NULL and parent value
2939 * will never be used in the following code. The check
2940 * in tcf_fill_node prevents it. However, compiler does not
2941 * see that far, so set parent to zero to silence the warning
2942 * about parent being uninitialized.
2943 */
2944 parent = 0;
2945 } else {
2946 const struct Qdisc_class_ops *cops;
2947 struct net_device *dev;
2948 unsigned long cl = 0;
2949
2950 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2951 if (!dev)
2952 return skb->len;
2953
2954 parent = tcm->tcm_parent;
2955 if (!parent) {
2956 q = dev->qdisc;
2957 parent = q->handle;
2958 } else {
2959 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
2960 }
2961 if (!q)
2962 goto out;
2963 cops = q->ops->cl_ops;
2964 if (!cops)
2965 goto out;
2966 if (!cops->tcf_block)
2967 goto out;
2968 if (TC_H_MIN(tcm->tcm_parent)) {
2969 cl = cops->find(q, tcm->tcm_parent);
2970 if (cl == 0)
2971 goto out;
2972 }
2973 block = cops->tcf_block(q, cl, NULL);
2974 if (!block)
2975 goto out;
2976 if (tcf_block_shared(block))
2977 q = NULL;
2978 }
2979
2980 index_start = cb->args[0];
2981 index = 0;
2982
2983 mutex_lock(&block->lock);
2984 list_for_each_entry(chain, &block->chain_list, list) {
2985 if ((tca[TCA_CHAIN] &&
2986 nla_get_u32(tca[TCA_CHAIN]) != chain->index))
2987 continue;
2988 if (index < index_start) {
2989 index++;
2990 continue;
2991 }
2992 if (tcf_chain_held_by_acts_only(chain))
2993 continue;
2994 err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
2995 chain->index, net, skb, block,
2996 NETLINK_CB(cb->skb).portid,
2997 cb->nlh->nlmsg_seq, NLM_F_MULTI,
2998 RTM_NEWCHAIN);
2999 if (err <= 0)
3000 break;
3001 index++;
3002 }
3003 mutex_unlock(&block->lock);
3004
3005 if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
3006 tcf_block_refcnt_put(block, true);
3007 cb->args[0] = index;
3008
3009 out:
3010 /* If we did no progress, the error (EMSGSIZE) is real */
3011 if (skb->len == 0 && err)
3012 return err;
3013 return skb->len;
3014 }
3015
3016 void tcf_exts_destroy(struct tcf_exts *exts)
3017 {
3018 #ifdef CONFIG_NET_CLS_ACT
3019 tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
3020 kfree(exts->actions);
3021 exts->nr_actions = 0;
3022 #endif
3023 }
3024 EXPORT_SYMBOL(tcf_exts_destroy);
3025
3026 int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
3027 struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
3028 bool rtnl_held, struct netlink_ext_ack *extack)
3029 {
3030 #ifdef CONFIG_NET_CLS_ACT
3031 {
3032 struct tc_action *act;
3033 size_t attr_size = 0;
3034
3035 if (exts->police && tb[exts->police]) {
3036 act = tcf_action_init_1(net, tp, tb[exts->police],
3037 rate_tlv, "police", ovr,
3038 TCA_ACT_BIND, rtnl_held,
3039 extack);
3040 if (IS_ERR(act))
3041 return PTR_ERR(act);
3042
3043 act->type = exts->type = TCA_OLD_COMPAT;
3044 exts->actions[0] = act;
3045 exts->nr_actions = 1;
3046 } else if (exts->action && tb[exts->action]) {
3047 int err;
3048
3049 err = tcf_action_init(net, tp, tb[exts->action],
3050 rate_tlv, NULL, ovr, TCA_ACT_BIND,
3051 exts->actions, &attr_size,
3052 rtnl_held, extack);
3053 if (err < 0)
3054 return err;
3055 exts->nr_actions = err;
3056 }
3057 }
3058 #else
3059 if ((exts->action && tb[exts->action]) ||
3060 (exts->police && tb[exts->police])) {
3061 NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)");
3062 return -EOPNOTSUPP;
3063 }
3064 #endif
3065
3066 return 0;
3067 }
3068 EXPORT_SYMBOL(tcf_exts_validate);
3069
3070 void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
3071 {
3072 #ifdef CONFIG_NET_CLS_ACT
3073 struct tcf_exts old = *dst;
3074
3075 *dst = *src;
3076 tcf_exts_destroy(&old);
3077 #endif
3078 }
3079 EXPORT_SYMBOL(tcf_exts_change);
3080
3081 #ifdef CONFIG_NET_CLS_ACT
3082 static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
3083 {
3084 if (exts->nr_actions == 0)
3085 return NULL;
3086 else
3087 return exts->actions[0];
3088 }
3089 #endif
3090
3091 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
3092 {
3093 #ifdef CONFIG_NET_CLS_ACT
3094 struct nlattr *nest;
3095
3096 if (exts->action && tcf_exts_has_actions(exts)) {
3097 /*
3098 * again for backward compatible mode - we want
3099 * to work with both old and new modes of entering
3100 * tc data even if iproute2 was newer - jhs
3101 */
3102 if (exts->type != TCA_OLD_COMPAT) {
3103 nest = nla_nest_start(skb, exts->action);
3104 if (nest == NULL)
3105 goto nla_put_failure;
3106
3107 if (tcf_action_dump(skb, exts->actions, 0, 0) < 0)
3108 goto nla_put_failure;
3109 nla_nest_end(skb, nest);
3110 } else if (exts->police) {
3111 struct tc_action *act = tcf_exts_first_act(exts);
3112 nest = nla_nest_start(skb, exts->police);
3113 if (nest == NULL || !act)
3114 goto nla_put_failure;
3115 if (tcf_action_dump_old(skb, act, 0, 0) < 0)
3116 goto nla_put_failure;
3117 nla_nest_end(skb, nest);
3118 }
3119 }
3120 return 0;
3121
3122 nla_put_failure:
3123 nla_nest_cancel(skb, nest);
3124 return -1;
3125 #else
3126 return 0;
3127 #endif
3128 }
3129 EXPORT_SYMBOL(tcf_exts_dump);
3130
3131
3132 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
3133 {
3134 #ifdef CONFIG_NET_CLS_ACT
3135 struct tc_action *a = tcf_exts_first_act(exts);
3136 if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
3137 return -1;
3138 #endif
3139 return 0;
3140 }
3141 EXPORT_SYMBOL(tcf_exts_dump_stats);
3142
3143 int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
3144 void *type_data, bool err_stop)
3145 {
3146 struct tcf_block_cb *block_cb;
3147 int ok_count = 0;
3148 int err;
3149
3150 /* Make sure all netdevs sharing this block are offload-capable. */
3151 if (block->nooffloaddevcnt && err_stop)
3152 return -EOPNOTSUPP;
3153
3154 list_for_each_entry(block_cb, &block->cb_list, list) {
3155 err = block_cb->cb(type, type_data, block_cb->cb_priv);
3156 if (err) {
3157 if (err_stop)
3158 return err;
3159 } else {
3160 ok_count++;
3161 }
3162 }
3163 return ok_count;
3164 }
3165 EXPORT_SYMBOL(tc_setup_cb_call);
3166
3167 int tc_setup_flow_action(struct flow_action *flow_action,
3168 const struct tcf_exts *exts)
3169 {
3170 const struct tc_action *act;
3171 int i, j, k;
3172
3173 if (!exts)
3174 return 0;
3175
3176 j = 0;
3177 tcf_exts_for_each_action(i, act, exts) {
3178 struct flow_action_entry *entry;
3179
3180 entry = &flow_action->entries[j];
3181 if (is_tcf_gact_ok(act)) {
3182 entry->id = FLOW_ACTION_ACCEPT;
3183 } else if (is_tcf_gact_shot(act)) {
3184 entry->id = FLOW_ACTION_DROP;
3185 } else if (is_tcf_gact_trap(act)) {
3186 entry->id = FLOW_ACTION_TRAP;
3187 } else if (is_tcf_gact_goto_chain(act)) {
3188 entry->id = FLOW_ACTION_GOTO;
3189 entry->chain_index = tcf_gact_goto_chain_index(act);
3190 } else if (is_tcf_mirred_egress_redirect(act)) {
3191 entry->id = FLOW_ACTION_REDIRECT;
3192 entry->dev = tcf_mirred_dev(act);
3193 } else if (is_tcf_mirred_egress_mirror(act)) {
3194 entry->id = FLOW_ACTION_MIRRED;
3195 entry->dev = tcf_mirred_dev(act);
3196 } else if (is_tcf_vlan(act)) {
3197 switch (tcf_vlan_action(act)) {
3198 case TCA_VLAN_ACT_PUSH:
3199 entry->id = FLOW_ACTION_VLAN_PUSH;
3200 entry->vlan.vid = tcf_vlan_push_vid(act);
3201 entry->vlan.proto = tcf_vlan_push_proto(act);
3202 entry->vlan.prio = tcf_vlan_push_prio(act);
3203 break;
3204 case TCA_VLAN_ACT_POP:
3205 entry->id = FLOW_ACTION_VLAN_POP;
3206 break;
3207 case TCA_VLAN_ACT_MODIFY:
3208 entry->id = FLOW_ACTION_VLAN_MANGLE;
3209 entry->vlan.vid = tcf_vlan_push_vid(act);
3210 entry->vlan.proto = tcf_vlan_push_proto(act);
3211 entry->vlan.prio = tcf_vlan_push_prio(act);
3212 break;
3213 default:
3214 goto err_out;
3215 }
3216 } else if (is_tcf_tunnel_set(act)) {
3217 entry->id = FLOW_ACTION_TUNNEL_ENCAP;
3218 entry->tunnel = tcf_tunnel_info(act);
3219 } else if (is_tcf_tunnel_release(act)) {
3220 entry->id = FLOW_ACTION_TUNNEL_DECAP;
3221 entry->tunnel = tcf_tunnel_info(act);
3222 } else if (is_tcf_pedit(act)) {
3223 for (k = 0; k < tcf_pedit_nkeys(act); k++) {
3224 switch (tcf_pedit_cmd(act, k)) {
3225 case TCA_PEDIT_KEY_EX_CMD_SET:
3226 entry->id = FLOW_ACTION_MANGLE;
3227 break;
3228 case TCA_PEDIT_KEY_EX_CMD_ADD:
3229 entry->id = FLOW_ACTION_ADD;
3230 break;
3231 default:
3232 goto err_out;
3233 }
3234 entry->mangle.htype = tcf_pedit_htype(act, k);
3235 entry->mangle.mask = tcf_pedit_mask(act, k);
3236 entry->mangle.val = tcf_pedit_val(act, k);
3237 entry->mangle.offset = tcf_pedit_offset(act, k);
3238 entry = &flow_action->entries[++j];
3239 }
3240 } else if (is_tcf_csum(act)) {
3241 entry->id = FLOW_ACTION_CSUM;
3242 entry->csum_flags = tcf_csum_update_flags(act);
3243 } else if (is_tcf_skbedit_mark(act)) {
3244 entry->id = FLOW_ACTION_MARK;
3245 entry->mark = tcf_skbedit_mark(act);
3246 } else {
3247 goto err_out;
3248 }
3249
3250 if (!is_tcf_pedit(act))
3251 j++;
3252 }
3253 return 0;
3254 err_out:
3255 return -EOPNOTSUPP;
3256 }
3257 EXPORT_SYMBOL(tc_setup_flow_action);
3258
3259 unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
3260 {
3261 unsigned int num_acts = 0;
3262 struct tc_action *act;
3263 int i;
3264
3265 tcf_exts_for_each_action(i, act, exts) {
3266 if (is_tcf_pedit(act))
3267 num_acts += tcf_pedit_nkeys(act);
3268 else
3269 num_acts++;
3270 }
3271 return num_acts;
3272 }
3273 EXPORT_SYMBOL(tcf_exts_num_actions);
3274
3275 static __net_init int tcf_net_init(struct net *net)
3276 {
3277 struct tcf_net *tn = net_generic(net, tcf_net_id);
3278
3279 spin_lock_init(&tn->idr_lock);
3280 idr_init(&tn->idr);
3281 return 0;
3282 }
3283
3284 static void __net_exit tcf_net_exit(struct net *net)
3285 {
3286 struct tcf_net *tn = net_generic(net, tcf_net_id);
3287
3288 idr_destroy(&tn->idr);
3289 }
3290
3291 static struct pernet_operations tcf_net_ops = {
3292 .init = tcf_net_init,
3293 .exit = tcf_net_exit,
3294 .id = &tcf_net_id,
3295 .size = sizeof(struct tcf_net),
3296 };
3297
3298 static int __init tc_filter_init(void)
3299 {
3300 int err;
3301
3302 tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
3303 if (!tc_filter_wq)
3304 return -ENOMEM;
3305
3306 err = register_pernet_subsys(&tcf_net_ops);
3307 if (err)
3308 goto err_register_pernet_subsys;
3309
3310 err = rhashtable_init(&indr_setup_block_ht,
3311 &tc_indr_setup_block_ht_params);
3312 if (err)
3313 goto err_rhash_setup_block_ht;
3314
3315 rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
3316 RTNL_FLAG_DOIT_UNLOCKED);
3317 rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
3318 RTNL_FLAG_DOIT_UNLOCKED);
3319 rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
3320 tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
3321 rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
3322 rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
3323 rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
3324 tc_dump_chain, 0);
3325
3326 return 0;
3327
3328 err_rhash_setup_block_ht:
3329 unregister_pernet_subsys(&tcf_net_ops);
3330 err_register_pernet_subsys:
3331 destroy_workqueue(tc_filter_wq);
3332 return err;
3333 }
3334
3335 subsys_initcall(tc_filter_init);