[thirdparty/kernel/stable.git] / net / sched / cls_bpf.c

/*
 * Berkeley Packet Filter based traffic classifier
 *
 * Might be used to classify traffic through flexible, user-defined and
 * possibly JIT-ed BPF filters for traffic control as an alternative to
 * ematches.
 *
 * (C) 2013 Daniel Borkmann <dborkman@redhat.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/skbuff.h>
#include <linux/filter.h>
#include <net/rtnetlink.h>
#include <net/pkt_cls.h>
#include <net/sock.h>

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
MODULE_DESCRIPTION("TC BPF based classifier");

struct cls_bpf_head {
	struct list_head plist;
	u32 hgen;
};

struct cls_bpf_prog {
	struct sk_filter *filter;
	struct sock_filter *bpf_ops;
	struct tcf_exts exts;
	struct tcf_result res;
	struct list_head link;
	u32 handle;
	u16 bpf_len;
};

static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
	[TCA_BPF_CLASSID]	= { .type = NLA_U32 },
	[TCA_BPF_OPS_LEN]	= { .type = NLA_U16 },
	[TCA_BPF_OPS]		= { .type = NLA_BINARY,
				    .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
};

static const struct tcf_ext_map bpf_ext_map = {
	.action = TCA_BPF_ACT,
	.police = TCA_BPF_POLICE,
};

static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
			    struct tcf_result *res)
{
	struct cls_bpf_head *head = tp->root;
	struct cls_bpf_prog *prog;
	int ret;

	list_for_each_entry(prog, &head->plist, link) {
		int filter_res = SK_RUN_FILTER(prog->filter, skb);

		if (filter_res == 0)
			continue;

		*res = prog->res;
		if (filter_res != -1)
			res->classid = filter_res;

		ret = tcf_exts_exec(skb, &prog->exts, res);
		if (ret < 0)
			continue;

		return ret;
	}

	return -1;
}

static int cls_bpf_init(struct tcf_proto *tp)
{
	struct cls_bpf_head *head;

	head = kzalloc(sizeof(*head), GFP_KERNEL);
	if (head == NULL)
		return -ENOBUFS;

	INIT_LIST_HEAD(&head->plist);
	tp->root = head;

	return 0;
}

static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
{
	tcf_unbind_filter(tp, &prog->res);
	tcf_exts_destroy(tp, &prog->exts);

	sk_unattached_filter_destroy(prog->filter);

	kfree(prog->bpf_ops);
	kfree(prog);
}

static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
{
	struct cls_bpf_head *head = tp->root;
	struct cls_bpf_prog *prog, *todel = (struct cls_bpf_prog *) arg;

	list_for_each_entry(prog, &head->plist, link) {
		if (prog == todel) {
			tcf_tree_lock(tp);
			list_del(&prog->link);
			tcf_tree_unlock(tp);

			cls_bpf_delete_prog(tp, prog);
			return 0;
		}
	}

	return -ENOENT;
}

static void cls_bpf_destroy(struct tcf_proto *tp)
{
	struct cls_bpf_head *head = tp->root;
	struct cls_bpf_prog *prog, *tmp;

	list_for_each_entry_safe(prog, tmp, &head->plist, link) {
		list_del(&prog->link);
		cls_bpf_delete_prog(tp, prog);
	}

	kfree(head);
}

static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
{
	struct cls_bpf_head *head = tp->root;
	struct cls_bpf_prog *prog;
	unsigned long ret = 0UL;

	if (head == NULL)
		return 0UL;

	list_for_each_entry(prog, &head->plist, link) {
		if (prog->handle == handle) {
			ret = (unsigned long) prog;
			break;
		}
	}

	return ret;
}

static void cls_bpf_put(struct tcf_proto *tp, unsigned long f)
{
}

static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
				   struct cls_bpf_prog *prog,
				   unsigned long base, struct nlattr **tb,
				   struct nlattr *est)
{
	struct sock_filter *bpf_ops, *bpf_old;
	struct tcf_exts exts;
	struct sock_fprog tmp;
	struct sk_filter *fp, *fp_old;
	u16 bpf_size, bpf_len;
	u32 classid;
	int ret;

	if (!tb[TCA_BPF_OPS_LEN] || !tb[TCA_BPF_OPS] || !tb[TCA_BPF_CLASSID])
		return -EINVAL;

	ret = tcf_exts_validate(net, tp, tb, est, &exts, &bpf_ext_map);
	if (ret < 0)
		return ret;

	classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
	bpf_len = nla_get_u16(tb[TCA_BPF_OPS_LEN]);
	if (bpf_len > BPF_MAXINSNS || bpf_len == 0) {
		ret = -EINVAL;
		goto errout;
	}

	bpf_size = bpf_len * sizeof(*bpf_ops);
	bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
	if (bpf_ops == NULL) {
		ret = -ENOMEM;
		goto errout;
	}

	memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);

	tmp.len = bpf_len;
	tmp.filter = (struct sock_filter __user *) bpf_ops;

	ret = sk_unattached_filter_create(&fp, &tmp);
	if (ret)
		goto errout_free;

	tcf_tree_lock(tp);
	fp_old = prog->filter;
	bpf_old = prog->bpf_ops;

	prog->bpf_len = bpf_len;
	prog->bpf_ops = bpf_ops;
	prog->filter = fp;
	prog->res.classid = classid;
	tcf_tree_unlock(tp);

	tcf_bind_filter(tp, &prog->res, base);
	tcf_exts_change(tp, &prog->exts, &exts);

	if (fp_old)
		sk_unattached_filter_destroy(fp_old);
	if (bpf_old)
		kfree(bpf_old);

	return 0;

errout_free:
	kfree(bpf_ops);
errout:
	tcf_exts_destroy(tp, &exts);
	return ret;
}

static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
				   struct cls_bpf_head *head)
{
	unsigned int i = 0x80000000;

	do {
		if (++head->hgen == 0x7FFFFFFF)
			head->hgen = 1;
	} while (--i > 0 && cls_bpf_get(tp, head->hgen));
	if (i == 0)
		pr_err("Insufficient number of handles\n");

	return i;
}

static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
			  struct tcf_proto *tp, unsigned long base,
			  u32 handle, struct nlattr **tca,
			  unsigned long *arg)
{
	struct cls_bpf_head *head = tp->root;
	struct cls_bpf_prog *prog = (struct cls_bpf_prog *) *arg;
	struct nlattr *tb[TCA_BPF_MAX + 1];
	int ret;

	if (tca[TCA_OPTIONS] == NULL)
		return -EINVAL;

	ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy);
	if (ret < 0)
		return ret;

	if (prog != NULL) {
		if (handle && prog->handle != handle)
			return -EINVAL;
		return cls_bpf_modify_existing(net, tp, prog, base, tb,
					       tca[TCA_RATE]);
	}

	prog = kzalloc(sizeof(*prog), GFP_KERNEL);
	if (prog == NULL)
		return -ENOBUFS;

	if (handle == 0)
		prog->handle = cls_bpf_grab_new_handle(tp, head);
	else
		prog->handle = handle;
	if (prog->handle == 0) {
		ret = -EINVAL;
		goto errout;
	}

	ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE]);
	if (ret < 0)
		goto errout;

	tcf_tree_lock(tp);
	list_add(&prog->link, &head->plist);
	tcf_tree_unlock(tp);

	*arg = (unsigned long) prog;

	return 0;
errout:
	if (*arg == 0UL && prog)
		kfree(prog);

	return ret;
}

static int cls_bpf_dump(struct tcf_proto *tp, unsigned long fh,
			struct sk_buff *skb, struct tcmsg *tm)
{
	struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh;
	struct nlattr *nest, *nla;

	if (prog == NULL)
		return skb->len;

	tm->tcm_handle = prog->handle;

	nest = nla_nest_start(skb, TCA_OPTIONS);
	if (nest == NULL)
		goto nla_put_failure;

	if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
		goto nla_put_failure;
	if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_len))
		goto nla_put_failure;

	nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_len *
			  sizeof(struct sock_filter));
	if (nla == NULL)
		goto nla_put_failure;

        memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));

	if (tcf_exts_dump(skb, &prog->exts, &bpf_ext_map) < 0)
		goto nla_put_failure;

	nla_nest_end(skb, nest);

	if (tcf_exts_dump_stats(skb, &prog->exts, &bpf_ext_map) < 0)
		goto nla_put_failure;

	return skb->len;

nla_put_failure:
	nla_nest_cancel(skb, nest);
	return -1;
}

static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
{
	struct cls_bpf_head *head = tp->root;
	struct cls_bpf_prog *prog;

	list_for_each_entry(prog, &head->plist, link) {
		if (arg->count < arg->skip)
			goto skip;
		if (arg->fn(tp, (unsigned long) prog, arg) < 0) {
			arg->stop = 1;
			break;
		}
skip:
		arg->count++;
	}
}

static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
	.kind		=	"bpf",
	.owner		=	THIS_MODULE,
	.classify	=	cls_bpf_classify,
	.init		=	cls_bpf_init,
	.destroy	=	cls_bpf_destroy,
	.get		=	cls_bpf_get,
	.put		=	cls_bpf_put,
	.change		=	cls_bpf_change,
	.delete		=	cls_bpf_delete,
	.walk		=	cls_bpf_walk,
	.dump		=	cls_bpf_dump,
};

static int __init cls_bpf_init_mod(void)
{
	return register_tcf_proto_ops(&cls_bpf_ops);
}

static void __exit cls_bpf_exit_mod(void)
{
	unregister_tcf_proto_ops(&cls_bpf_ops);
}

module_init(cls_bpf_init_mod);
module_exit(cls_bpf_exit_mod);
Commit	Line	Data
7d1d65cb DB	1	/*
	2	* Berkeley Packet Filter based traffic classifier
	3	*
	4	* Might be used to classify traffic through flexible, user-defined and
	5	* possibly JIT-ed BPF filters for traffic control as an alternative to
	6	* ematches.
	7	*
	8	* (C) 2013 Daniel Borkmann <dborkman@redhat.com>
	9	*
	10	* This program is free software; you can redistribute it and/or modify
	11	* it under the terms of the GNU General Public License version 2 as
	12	* published by the Free Software Foundation.
	13	*/
	14
	15	#include <linux/module.h>
	16	#include <linux/types.h>
	17	#include <linux/skbuff.h>
	18	#include <linux/filter.h>
	19	#include <net/rtnetlink.h>
	20	#include <net/pkt_cls.h>
	21	#include <net/sock.h>
	22
	23	MODULE_LICENSE("GPL");
	24	MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
	25	MODULE_DESCRIPTION("TC BPF based classifier");
	26
	27	struct cls_bpf_head {
	28	struct list_head plist;
	29	u32 hgen;
	30	};
	31
	32	struct cls_bpf_prog {
	33	struct sk_filter *filter;
	34	struct sock_filter *bpf_ops;
	35	struct tcf_exts exts;
	36	struct tcf_result res;
	37	struct list_head link;
	38	u32 handle;
	39	u16 bpf_len;
	40	};
	41
	42	static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
	43	[TCA_BPF_CLASSID] = { .type = NLA_U32 },
	44	[TCA_BPF_OPS_LEN] = { .type = NLA_U16 },
	45	[TCA_BPF_OPS] = { .type = NLA_BINARY,
	46	.len = sizeof(struct sock_filter) * BPF_MAXINSNS },
	47	};
	48
	49	static const struct tcf_ext_map bpf_ext_map = {
	50	.action = TCA_BPF_ACT,
	51	.police = TCA_BPF_POLICE,
	52	};
	53
	54	static int cls_bpf_classify(struct sk_buff skb, const struct tcf_proto tp,
	55	struct tcf_result *res)
	56	{
	57	struct cls_bpf_head *head = tp->root;
	58	struct cls_bpf_prog *prog;
	59	int ret;
	60
	61	list_for_each_entry(prog, &head->plist, link) {
	62	int filter_res = SK_RUN_FILTER(prog->filter, skb);
	63
	64	if (filter_res == 0)
65	continue;
66
67	*res = prog->res;
68	if (filter_res != -1)
69	res->classid = filter_res;
70
71	ret = tcf_exts_exec(skb, &prog->exts, res);
72	if (ret < 0)
73	continue;
74
75	return ret;
76	}
77
78	return -1;
79	}
80
81	static int cls_bpf_init(struct tcf_proto *tp)
82	{
83	struct cls_bpf_head *head;
84
85	head = kzalloc(sizeof(*head), GFP_KERNEL);
86	if (head == NULL)
87	return -ENOBUFS;
88
89	INIT_LIST_HEAD(&head->plist);
90	tp->root = head;
91
92	return 0;
93	}
94
95	static void cls_bpf_delete_prog(struct tcf_proto tp, struct cls_bpf_prog prog)
96	{
97	tcf_unbind_filter(tp, &prog->res);
98	tcf_exts_destroy(tp, &prog->exts);
99
100	sk_unattached_filter_destroy(prog->filter);
101
102	kfree(prog->bpf_ops);
103	kfree(prog);
104	}
105
106	static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
107	{
108	struct cls_bpf_head *head = tp->root;
109	struct cls_bpf_prog prog, todel = (struct cls_bpf_prog *) arg;
110
111	list_for_each_entry(prog, &head->plist, link) {
112	if (prog == todel) {
113	tcf_tree_lock(tp);
114	list_del(&prog->link);
115	tcf_tree_unlock(tp);
116
117	cls_bpf_delete_prog(tp, prog);
118	return 0;
119	}
120	}
121
122	return -ENOENT;
123	}
124
125	static void cls_bpf_destroy(struct tcf_proto *tp)
126	{
127	struct cls_bpf_head *head = tp->root;
128	struct cls_bpf_prog prog, tmp;
129
130	list_for_each_entry_safe(prog, tmp, &head->plist, link) {
131	list_del(&prog->link);
132	cls_bpf_delete_prog(tp, prog);
133	}
134
135	kfree(head);
136	}
137
138	static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
139	{
140	struct cls_bpf_head *head = tp->root;
141	struct cls_bpf_prog *prog;
142	unsigned long ret = 0UL;
143
144	if (head == NULL)
145	return 0UL;
146
147	list_for_each_entry(prog, &head->plist, link) {
148	if (prog->handle == handle) {
149	ret = (unsigned long) prog;
150	break;
151	}
152	}
153
154	return ret;
155	}
156
157	static void cls_bpf_put(struct tcf_proto *tp, unsigned long f)
158	{
159	}
160
161	static int cls_bpf_modify_existing(struct net net, struct tcf_proto tp,
162	struct cls_bpf_prog *prog,
163	unsigned long base, struct nlattr **tb,
164	struct nlattr *est)
165	{
166	struct sock_filter bpf_ops, bpf_old;
167	struct tcf_exts exts;
168	struct sock_fprog tmp;
169	struct sk_filter fp, fp_old;
170	u16 bpf_size, bpf_len;
171	u32 classid;
172	int ret;
173
174	if (!tb[TCA_BPF_OPS_LEN] \|\| !tb[TCA_BPF_OPS] \|\| !tb[TCA_BPF_CLASSID])
175	return -EINVAL;
176
177	ret = tcf_exts_validate(net, tp, tb, est, &exts, &bpf_ext_map);
178	if (ret < 0)
179	return ret;
180
181	classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
182	bpf_len = nla_get_u16(tb[TCA_BPF_OPS_LEN]);
183	if (bpf_len > BPF_MAXINSNS \|\| bpf_len == 0) {
184	ret = -EINVAL;
185	goto errout;
186	}
187
188	bpf_size = bpf_len * sizeof(*bpf_ops);
189	bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
190	if (bpf_ops == NULL) {
191	ret = -ENOMEM;
192	goto errout;
193	}
194
195	memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
196
197	tmp.len = bpf_len;
198	tmp.filter = (struct sock_filter __user *) bpf_ops;
199
200	ret = sk_unattached_filter_create(&fp, &tmp);
201	if (ret)
202	goto errout_free;
203
204	tcf_tree_lock(tp);
205	fp_old = prog->filter;
206	bpf_old = prog->bpf_ops;
207
208	prog->bpf_len = bpf_len;
209	prog->bpf_ops = bpf_ops;
210	prog->filter = fp;
211	prog->res.classid = classid;
212	tcf_tree_unlock(tp);
213
214	tcf_bind_filter(tp, &prog->res, base);
215	tcf_exts_change(tp, &prog->exts, &exts);
216
217	if (fp_old)
218	sk_unattached_filter_destroy(fp_old);
219	if (bpf_old)
220	kfree(bpf_old);
221
222	return 0;
223
224	errout_free:
225	kfree(bpf_ops);
226	errout:
227	tcf_exts_destroy(tp, &exts);
228	return ret;
229	}
230
231	static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
232	struct cls_bpf_head *head)
233	{
234	unsigned int i = 0x80000000;
235
236	do {
237	if (++head->hgen == 0x7FFFFFFF)
238	head->hgen = 1;
239	} while (--i > 0 && cls_bpf_get(tp, head->hgen));
240	if (i == 0)
241	pr_err("Insufficient number of handles\n");
242
243	return i;
244	}
245
246	static int cls_bpf_change(struct net net, struct sk_buff in_skb,
247	struct tcf_proto *tp, unsigned long base,
248	u32 handle, struct nlattr **tca,
249	unsigned long *arg)
250	{
251	struct cls_bpf_head *head = tp->root;
252	struct cls_bpf_prog prog = (struct cls_bpf_prog ) *arg;
253	struct nlattr *tb[TCA_BPF_MAX + 1];
254	int ret;
255
256	if (tca[TCA_OPTIONS] == NULL)
257	return -EINVAL;
258
259	ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy);
260	if (ret < 0)
261	return ret;
262
263	if (prog != NULL) {
264	if (handle && prog->handle != handle)
265	return -EINVAL;
266	return cls_bpf_modify_existing(net, tp, prog, base, tb,
267	tca[TCA_RATE]);
268	}
269
270	prog = kzalloc(sizeof(*prog), GFP_KERNEL);
271	if (prog == NULL)
272	return -ENOBUFS;
273
274	if (handle == 0)
275	prog->handle = cls_bpf_grab_new_handle(tp, head);
276	else
277	prog->handle = handle;
278	if (prog->handle == 0) {
279	ret = -EINVAL;
280	goto errout;
281	}
282
283	ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE]);
284	if (ret < 0)
285	goto errout;
286
287	tcf_tree_lock(tp);
288	list_add(&prog->link, &head->plist);
289	tcf_tree_unlock(tp);
290
291	*arg = (unsigned long) prog;
292
293	return 0;
294	errout:
295	if (*arg == 0UL && prog)
296	kfree(prog);
297
298	return ret;
299	}
300
301	static int cls_bpf_dump(struct tcf_proto *tp, unsigned long fh,
302	struct sk_buff skb, struct tcmsg tm)
303	{
304	struct cls_bpf_prog prog = (struct cls_bpf_prog ) fh;
305	struct nlattr nest, nla;
306
307	if (prog == NULL)
308	return skb->len;
309
310	tm->tcm_handle = prog->handle;
311
312	nest = nla_nest_start(skb, TCA_OPTIONS);
313	if (nest == NULL)
314	goto nla_put_failure;
315
316	if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
317	goto nla_put_failure;
318	if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_len))
319	goto nla_put_failure;
320
321	nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_len *
322	sizeof(struct sock_filter));
323	if (nla == NULL)
324	goto nla_put_failure;
325
326	memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
327
328	if (tcf_exts_dump(skb, &prog->exts, &bpf_ext_map) < 0)
329	goto nla_put_failure;
330
331	nla_nest_end(skb, nest);
332
333	if (tcf_exts_dump_stats(skb, &prog->exts, &bpf_ext_map) < 0)
334	goto nla_put_failure;
335
336	return skb->len;
337
338	nla_put_failure:
339	nla_nest_cancel(skb, nest);
340	return -1;
341	}
342
343	static void cls_bpf_walk(struct tcf_proto tp, struct tcf_walker arg)
344	{
345	struct cls_bpf_head *head = tp->root;
346	struct cls_bpf_prog *prog;
347
348	list_for_each_entry(prog, &head->plist, link) {
349	if (arg->count < arg->skip)
350	goto skip;
351	if (arg->fn(tp, (unsigned long) prog, arg) < 0) {
352	arg->stop = 1;
353	break;
354	}
355	skip:
356	arg->count++;
357	}
358	}
359
360	static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
361	.kind = "bpf",
362	.owner = THIS_MODULE,
363	.classify = cls_bpf_classify,
364	.init = cls_bpf_init,
365	.destroy = cls_bpf_destroy,
366	.get = cls_bpf_get,
367	.put = cls_bpf_put,
368	.change = cls_bpf_change,
369	.delete = cls_bpf_delete,
370	.walk = cls_bpf_walk,
371	.dump = cls_bpf_dump,
372	};
373
374	static int __init cls_bpf_init_mod(void)
375	{
376	return register_tcf_proto_ops(&cls_bpf_ops);
377	}
378
379	static void __exit cls_bpf_exit_mod(void)
380	{
381	unregister_tcf_proto_ops(&cls_bpf_ops);
382	}
383
384	module_init(cls_bpf_init_mod);
385	module_exit(cls_bpf_exit_mod);