1 From 1a1cc3a15bba18bae3c29f4eb475aae0df9ef5bd Mon Sep 17 00:00:00 2001
2 From: Hannes Reinecke <hare@suse.de>
3 Date: Wed, 17 Sep 2008 16:47:10 +0200
4 Subject: [PATCH] pkt_action: add new action skbedit
6 This new action will have the ability to change the priority and/or
7 queue_mapping fields on an sk_buff.
9 Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
10 Signed-off-by: Hannes Reinecke <hare@suse.de>
12 Documentation/networking/multiqueue.txt | 9 ++-
13 include/linux/tc_act/Kbuild | 1 +
14 include/linux/tc_act/tc_skbedit.h | 44 +++++++
15 include/net/tc_act/tc_skbedit.h | 34 +++++
16 net/sched/Kconfig | 11 ++
17 net/sched/Makefile | 1 +
18 net/sched/act_skbedit.c | 203 +++++++++++++++++++++++++++++++
19 7 files changed, 302 insertions(+), 1 deletions(-)
20 create mode 100644 include/linux/tc_act/tc_skbedit.h
21 create mode 100644 include/net/tc_act/tc_skbedit.h
22 create mode 100644 net/sched/act_skbedit.c
24 diff --git a/Documentation/networking/multiqueue.txt b/Documentation/networking/multiqueue.txt
25 index 5787ee6..10113ff 100644
26 --- a/Documentation/networking/multiqueue.txt
27 +++ b/Documentation/networking/multiqueue.txt
28 @@ -66,7 +66,14 @@ band 3 => queue 3
29 Traffic will begin flowing through each queue if your base device has either
30 the default simple_tx_hash or a custom netdev->select_queue() defined.
32 -The behavior of tc filters remains the same.
33 +The behavior of tc filters remains the same. However a new tc action,
34 +skbedit, has been added. Assuming you wanted to route all traffic to a
35 +specific host, for example 192.168.0.3, though a specific queue you could use
36 +this action and establish a filter such as:
38 +tc filter add dev eth0 parent 1: protocol ip prio 1 u32 \
39 + match ip dst 192.168.0.3 \
40 + action skbedit queue_mapping 3
42 Author: Alexander Duyck <alexander.h.duyck@intel.com>
43 Original Author: Peter P. Waskiewicz Jr. <peter.p.waskiewicz.jr@intel.com>
44 diff --git a/include/linux/tc_act/Kbuild b/include/linux/tc_act/Kbuild
45 index 6dac0d7..7699093 100644
46 --- a/include/linux/tc_act/Kbuild
47 +++ b/include/linux/tc_act/Kbuild
48 @@ -3,3 +3,4 @@ header-y += tc_ipt.h
49 header-y += tc_mirred.h
50 header-y += tc_pedit.h
52 +header-y += tc_skbedit.h
53 diff --git a/include/linux/tc_act/tc_skbedit.h b/include/linux/tc_act/tc_skbedit.h
55 index 0000000..a14e461
57 +++ b/include/linux/tc_act/tc_skbedit.h
60 + * Copyright (c) 2008, Intel Corporation.
62 + * This program is free software; you can redistribute it and/or modify it
63 + * under the terms and conditions of the GNU General Public License,
64 + * version 2, as published by the Free Software Foundation.
66 + * This program is distributed in the hope it will be useful, but WITHOUT
67 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
68 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
71 + * You should have received a copy of the GNU General Public License along with
72 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
73 + * Place - Suite 330, Boston, MA 02111-1307 USA.
75 + * Author: Alexander Duyck <alexander.h.duyck@intel.com>
78 +#ifndef __LINUX_TC_SKBEDIT_H
79 +#define __LINUX_TC_SKBEDIT_H
81 +#include <linux/pkt_cls.h>
83 +#define TCA_ACT_SKBEDIT 11
85 +#define SKBEDIT_F_PRIORITY 0x1
86 +#define SKBEDIT_F_QUEUE_MAPPING 0x2
96 + TCA_SKBEDIT_PRIORITY,
97 + TCA_SKBEDIT_QUEUE_MAPPING,
100 +#define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1)
103 diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
105 index 0000000..6abb3ed
107 +++ b/include/net/tc_act/tc_skbedit.h
110 + * Copyright (c) 2008, Intel Corporation.
112 + * This program is free software; you can redistribute it and/or modify it
113 + * under the terms and conditions of the GNU General Public License,
114 + * version 2, as published by the Free Software Foundation.
116 + * This program is distributed in the hope it will be useful, but WITHOUT
117 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
118 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
121 + * You should have received a copy of the GNU General Public License along with
122 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
123 + * Place - Suite 330, Boston, MA 02111-1307 USA.
125 + * Author: Alexander Duyck <alexander.h.duyck@intel.com>
128 +#ifndef __NET_TC_SKBEDIT_H
129 +#define __NET_TC_SKBEDIT_H
131 +#include <net/act_api.h>
133 +struct tcf_skbedit {
134 + struct tcf_common common;
139 +#define to_skbedit(pc) \
140 + container_of(pc, struct tcf_skbedit, common)
142 +#endif /* __NET_TC_SKBEDIT_H */
143 diff --git a/net/sched/Kconfig b/net/sched/Kconfig
144 index efaa7a7..6767e54 100644
145 --- a/net/sched/Kconfig
146 +++ b/net/sched/Kconfig
147 @@ -485,6 +485,17 @@ config NET_ACT_SIMP
148 To compile this code as a module, choose M here: the
149 module will be called simple.
151 +config NET_ACT_SKBEDIT
152 + tristate "SKB Editing"
153 + depends on NET_CLS_ACT
155 + Say Y here to change skb priority or queue_mapping settings.
159 + To compile this code as a module, choose M here: the
160 + module will be called skbedit.
163 bool "Incoming device classification"
164 depends on NET_CLS_U32 || NET_CLS_FW
165 diff --git a/net/sched/Makefile b/net/sched/Makefile
166 index 3d9b953..e60c992 100644
167 --- a/net/sched/Makefile
168 +++ b/net/sched/Makefile
169 @@ -14,6 +14,7 @@ obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o
170 obj-$(CONFIG_NET_ACT_NAT) += act_nat.o
171 obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o
172 obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
173 +obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o
174 obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
175 obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
176 obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
177 diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
179 index 0000000..fe9777e
181 +++ b/net/sched/act_skbedit.c
184 + * Copyright (c) 2008, Intel Corporation.
186 + * This program is free software; you can redistribute it and/or modify it
187 + * under the terms and conditions of the GNU General Public License,
188 + * version 2, as published by the Free Software Foundation.
190 + * This program is distributed in the hope it will be useful, but WITHOUT
191 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
192 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
195 + * You should have received a copy of the GNU General Public License along with
196 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
197 + * Place - Suite 330, Boston, MA 02111-1307 USA.
199 + * Author: Alexander Duyck <alexander.h.duyck@intel.com>
202 +#include <linux/module.h>
203 +#include <linux/init.h>
204 +#include <linux/kernel.h>
205 +#include <linux/skbuff.h>
206 +#include <linux/rtnetlink.h>
207 +#include <net/netlink.h>
208 +#include <net/pkt_sched.h>
210 +#include <linux/tc_act/tc_skbedit.h>
211 +#include <net/tc_act/tc_skbedit.h>
213 +#define SKBEDIT_TAB_MASK 15
214 +static struct tcf_common *tcf_skbedit_ht[SKBEDIT_TAB_MASK + 1];
215 +static u32 skbedit_idx_gen;
216 +static DEFINE_RWLOCK(skbedit_lock);
218 +static struct tcf_hashinfo skbedit_hash_info = {
219 + .htab = tcf_skbedit_ht,
220 + .hmask = SKBEDIT_TAB_MASK,
221 + .lock = &skbedit_lock,
224 +static int tcf_skbedit(struct sk_buff *skb, struct tc_action *a,
225 + struct tcf_result *res)
227 + struct tcf_skbedit *d = a->priv;
229 + spin_lock(&d->tcf_lock);
230 + d->tcf_tm.lastuse = jiffies;
231 + d->tcf_bstats.bytes += qdisc_pkt_len(skb);
232 + d->tcf_bstats.packets++;
234 + if (d->flags & SKBEDIT_F_PRIORITY)
235 + skb->priority = d->priority;
236 + if (d->flags & SKBEDIT_F_QUEUE_MAPPING &&
237 + skb->dev->real_num_tx_queues > d->queue_mapping)
238 + skb_set_queue_mapping(skb, d->queue_mapping);
240 + spin_unlock(&d->tcf_lock);
241 + return d->tcf_action;
244 +static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
245 + [TCA_SKBEDIT_PARMS] = { .len = sizeof(struct tc_skbedit) },
246 + [TCA_SKBEDIT_PRIORITY] = { .len = sizeof(u32) },
247 + [TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) },
250 +static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
251 + struct tc_action *a, int ovr, int bind)
253 + struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
254 + struct tc_skbedit *parm;
255 + struct tcf_skbedit *d;
256 + struct tcf_common *pc;
257 + u32 flags = 0, *priority = NULL;
258 + u16 *queue_mapping = NULL;
264 + err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy);
268 + if (tb[TCA_SKBEDIT_PARMS] == NULL)
271 + if (tb[TCA_SKBEDIT_PRIORITY] != NULL) {
272 + flags |= SKBEDIT_F_PRIORITY;
273 + priority = nla_data(tb[TCA_SKBEDIT_PRIORITY]);
276 + if (tb[TCA_SKBEDIT_QUEUE_MAPPING] != NULL) {
277 + flags |= SKBEDIT_F_QUEUE_MAPPING;
278 + queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]);
283 + parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
285 + pc = tcf_hash_check(parm->index, a, bind, &skbedit_hash_info);
287 + pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
288 + &skbedit_idx_gen, &skbedit_hash_info);
292 + d = to_skbedit(pc);
293 + ret = ACT_P_CREATED;
295 + d = to_skbedit(pc);
297 + tcf_hash_release(pc, bind, &skbedit_hash_info);
302 + spin_lock_bh(&d->tcf_lock);
305 + if (flags & SKBEDIT_F_PRIORITY)
306 + d->priority = *priority;
307 + if (flags & SKBEDIT_F_QUEUE_MAPPING)
308 + d->queue_mapping = *queue_mapping;
309 + d->tcf_action = parm->action;
311 + spin_unlock_bh(&d->tcf_lock);
313 + if (ret == ACT_P_CREATED)
314 + tcf_hash_insert(pc, &skbedit_hash_info);
318 +static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind)
320 + struct tcf_skbedit *d = a->priv;
323 + return tcf_hash_release(&d->common, bind, &skbedit_hash_info);
327 +static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
330 + unsigned char *b = skb_tail_pointer(skb);
331 + struct tcf_skbedit *d = a->priv;
332 + struct tc_skbedit opt;
335 + opt.index = d->tcf_index;
336 + opt.refcnt = d->tcf_refcnt - ref;
337 + opt.bindcnt = d->tcf_bindcnt - bind;
338 + opt.action = d->tcf_action;
339 + NLA_PUT(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt);
340 + if (d->flags & SKBEDIT_F_PRIORITY)
341 + NLA_PUT(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority),
343 + if (d->flags & SKBEDIT_F_QUEUE_MAPPING)
344 + NLA_PUT(skb, TCA_SKBEDIT_QUEUE_MAPPING,
345 + sizeof(d->queue_mapping), &d->queue_mapping);
346 + t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
347 + t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
348 + t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
349 + NLA_PUT(skb, TCA_SKBEDIT_TM, sizeof(t), &t);
353 + nlmsg_trim(skb, b);
357 +static struct tc_action_ops act_skbedit_ops = {
359 + .hinfo = &skbedit_hash_info,
360 + .type = TCA_ACT_SKBEDIT,
361 + .capab = TCA_CAP_NONE,
362 + .owner = THIS_MODULE,
363 + .act = tcf_skbedit,
364 + .dump = tcf_skbedit_dump,
365 + .cleanup = tcf_skbedit_cleanup,
366 + .init = tcf_skbedit_init,
367 + .walk = tcf_generic_walker,
370 +MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
371 +MODULE_DESCRIPTION("SKB Editing");
372 +MODULE_LICENSE("GPL");
374 +static int __init skbedit_init_module(void)
376 + return tcf_register_action(&act_skbedit_ops);
379 +static void __exit skbedit_cleanup_module(void)
381 + tcf_unregister_action(&act_skbedit_ops);
384 +module_init(skbedit_init_module);
385 +module_exit(skbedit_cleanup_module);