[people/ms/linux.git] / kernel / bpf / syscall.c

/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 */
#include <linux/bpf.h>
#include <linux/syscalls.h>
#include <linux/slab.h>
#include <linux/anon_inodes.h>
#include <linux/file.h>
#include <linux/license.h>
#include <linux/filter.h>

static LIST_HEAD(bpf_map_types);

static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
{
	struct bpf_map_type_list *tl;
	struct bpf_map *map;

	list_for_each_entry(tl, &bpf_map_types, list_node) {
		if (tl->type == attr->map_type) {
			map = tl->ops->map_alloc(attr);
			if (IS_ERR(map))
				return map;
			map->ops = tl->ops;
			map->map_type = attr->map_type;
			return map;
		}
	}
	return ERR_PTR(-EINVAL);
}

/* boot time registration of different map implementations */
void bpf_register_map_type(struct bpf_map_type_list *tl)
{
	list_add(&tl->list_node, &bpf_map_types);
}

/* called from workqueue */
static void bpf_map_free_deferred(struct work_struct *work)
{
	struct bpf_map *map = container_of(work, struct bpf_map, work);

	/* implementation dependent freeing */
	map->ops->map_free(map);
}

/* decrement map refcnt and schedule it for freeing via workqueue
 * (unrelying map implementation ops->map_free() might sleep)
 */
void bpf_map_put(struct bpf_map *map)
{
	if (atomic_dec_and_test(&map->refcnt)) {
		INIT_WORK(&map->work, bpf_map_free_deferred);
		schedule_work(&map->work);
	}
}

static int bpf_map_release(struct inode *inode, struct file *filp)
{
	struct bpf_map *map = filp->private_data;

	bpf_map_put(map);
	return 0;
}

static const struct file_operations bpf_map_fops = {
	.release = bpf_map_release,
};

/* helper macro to check that unused fields 'union bpf_attr' are zero */
#define CHECK_ATTR(CMD) \
	memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
		   sizeof(attr->CMD##_LAST_FIELD), 0, \
		   sizeof(*attr) - \
		   offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
		   sizeof(attr->CMD##_LAST_FIELD)) != NULL

#define BPF_MAP_CREATE_LAST_FIELD max_entries
/* called via syscall */
static int map_create(union bpf_attr *attr)
{
	struct bpf_map *map;
	int err;

	err = CHECK_ATTR(BPF_MAP_CREATE);
	if (err)
		return -EINVAL;

	/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
	map = find_and_alloc_map(attr);
	if (IS_ERR(map))
		return PTR_ERR(map);

	atomic_set(&map->refcnt, 1);

	err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC);

	if (err < 0)
		/* failed to allocate fd */
		goto free_map;

	return err;

free_map:
	map->ops->map_free(map);
	return err;
}

/* if error is returned, fd is released.
 * On success caller should complete fd access with matching fdput()
 */
struct bpf_map *bpf_map_get(struct fd f)
{
	struct bpf_map *map;

	if (!f.file)
		return ERR_PTR(-EBADF);

	if (f.file->f_op != &bpf_map_fops) {
		fdput(f);
		return ERR_PTR(-EINVAL);
	}

	map = f.file->private_data;

	return map;
}

/* helper to convert user pointers passed inside __aligned_u64 fields */
static void __user *u64_to_ptr(__u64 val)
{
	return (void __user *) (unsigned long) val;
}

/* last field in 'union bpf_attr' used by this command */
#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value

static int map_lookup_elem(union bpf_attr *attr)
{
	void __user *ukey = u64_to_ptr(attr->key);
	void __user *uvalue = u64_to_ptr(attr->value);
	int ufd = attr->map_fd;
	struct fd f = fdget(ufd);
	struct bpf_map *map;
	void *key, *value, *ptr;
	int err;

	if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
		return -EINVAL;

	map = bpf_map_get(f);
	if (IS_ERR(map))
		return PTR_ERR(map);

	err = -ENOMEM;
	key = kmalloc(map->key_size, GFP_USER);
	if (!key)
		goto err_put;

	err = -EFAULT;
	if (copy_from_user(key, ukey, map->key_size) != 0)
		goto free_key;

	err = -ENOMEM;
	value = kmalloc(map->value_size, GFP_USER);
	if (!value)
		goto free_key;

	rcu_read_lock();
	ptr = map->ops->map_lookup_elem(map, key);
	if (ptr)
		memcpy(value, ptr, map->value_size);
	rcu_read_unlock();

	err = -ENOENT;
	if (!ptr)
		goto free_value;

	err = -EFAULT;
	if (copy_to_user(uvalue, value, map->value_size) != 0)
		goto free_value;

	err = 0;

free_value:
	kfree(value);
free_key:
	kfree(key);
err_put:
	fdput(f);
	return err;
}

#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags

static int map_update_elem(union bpf_attr *attr)
{
	void __user *ukey = u64_to_ptr(attr->key);
	void __user *uvalue = u64_to_ptr(attr->value);
	int ufd = attr->map_fd;
	struct fd f = fdget(ufd);
	struct bpf_map *map;
	void *key, *value;
	int err;

	if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
		return -EINVAL;

	map = bpf_map_get(f);
	if (IS_ERR(map))
		return PTR_ERR(map);

	err = -ENOMEM;
	key = kmalloc(map->key_size, GFP_USER);
	if (!key)
		goto err_put;

	err = -EFAULT;
	if (copy_from_user(key, ukey, map->key_size) != 0)
		goto free_key;

	err = -ENOMEM;
	value = kmalloc(map->value_size, GFP_USER);
	if (!value)
		goto free_key;

	err = -EFAULT;
	if (copy_from_user(value, uvalue, map->value_size) != 0)
		goto free_value;

	/* eBPF program that use maps are running under rcu_read_lock(),
	 * therefore all map accessors rely on this fact, so do the same here
	 */
	rcu_read_lock();
	err = map->ops->map_update_elem(map, key, value, attr->flags);
	rcu_read_unlock();

free_value:
	kfree(value);
free_key:
	kfree(key);
err_put:
	fdput(f);
	return err;
}

#define BPF_MAP_DELETE_ELEM_LAST_FIELD key

static int map_delete_elem(union bpf_attr *attr)
{
	void __user *ukey = u64_to_ptr(attr->key);
	int ufd = attr->map_fd;
	struct fd f = fdget(ufd);
	struct bpf_map *map;
	void *key;
	int err;

	if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
		return -EINVAL;

	map = bpf_map_get(f);
	if (IS_ERR(map))
		return PTR_ERR(map);

	err = -ENOMEM;
	key = kmalloc(map->key_size, GFP_USER);
	if (!key)
		goto err_put;

	err = -EFAULT;
	if (copy_from_user(key, ukey, map->key_size) != 0)
		goto free_key;

	rcu_read_lock();
	err = map->ops->map_delete_elem(map, key);
	rcu_read_unlock();

free_key:
	kfree(key);
err_put:
	fdput(f);
	return err;
}

/* last field in 'union bpf_attr' used by this command */
#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key

static int map_get_next_key(union bpf_attr *attr)
{
	void __user *ukey = u64_to_ptr(attr->key);
	void __user *unext_key = u64_to_ptr(attr->next_key);
	int ufd = attr->map_fd;
	struct fd f = fdget(ufd);
	struct bpf_map *map;
	void *key, *next_key;
	int err;

	if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
		return -EINVAL;

	map = bpf_map_get(f);
	if (IS_ERR(map))
		return PTR_ERR(map);

	err = -ENOMEM;
	key = kmalloc(map->key_size, GFP_USER);
	if (!key)
		goto err_put;

	err = -EFAULT;
	if (copy_from_user(key, ukey, map->key_size) != 0)
		goto free_key;

	err = -ENOMEM;
	next_key = kmalloc(map->key_size, GFP_USER);
	if (!next_key)
		goto free_key;

	rcu_read_lock();
	err = map->ops->map_get_next_key(map, key, next_key);
	rcu_read_unlock();
	if (err)
		goto free_next_key;

	err = -EFAULT;
	if (copy_to_user(unext_key, next_key, map->key_size) != 0)
		goto free_next_key;

	err = 0;

free_next_key:
	kfree(next_key);
free_key:
	kfree(key);
err_put:
	fdput(f);
	return err;
}

static LIST_HEAD(bpf_prog_types);

static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
{
	struct bpf_prog_type_list *tl;

	list_for_each_entry(tl, &bpf_prog_types, list_node) {
		if (tl->type == type) {
			prog->aux->ops = tl->ops;
			prog->type = type;
			return 0;
		}
	}

	return -EINVAL;
}

void bpf_register_prog_type(struct bpf_prog_type_list *tl)
{
	list_add(&tl->list_node, &bpf_prog_types);
}

/* fixup insn->imm field of bpf_call instructions:
 * if (insn->imm == BPF_FUNC_map_lookup_elem)
 *      insn->imm = bpf_map_lookup_elem - __bpf_call_base;
 * else if (insn->imm == BPF_FUNC_map_update_elem)
 *      insn->imm = bpf_map_update_elem - __bpf_call_base;
 * else ...
 *
 * this function is called after eBPF program passed verification
 */
static void fixup_bpf_calls(struct bpf_prog *prog)
{
	const struct bpf_func_proto *fn;
	int i;

	for (i = 0; i < prog->len; i++) {
		struct bpf_insn *insn = &prog->insnsi[i];

		if (insn->code == (BPF_JMP | BPF_CALL)) {
			/* we reach here when program has bpf_call instructions
			 * and it passed bpf_check(), means that
			 * ops->get_func_proto must have been supplied, check it
			 */
			BUG_ON(!prog->aux->ops->get_func_proto);

			fn = prog->aux->ops->get_func_proto(insn->imm);
			/* all functions that have prototype and verifier allowed
			 * programs to call them, must be real in-kernel functions
			 */
			BUG_ON(!fn->func);
			insn->imm = fn->func - __bpf_call_base;
		}
	}
}

/* drop refcnt on maps used by eBPF program and free auxilary data */
static void free_used_maps(struct bpf_prog_aux *aux)
{
	int i;

	for (i = 0; i < aux->used_map_cnt; i++)
		bpf_map_put(aux->used_maps[i]);

	kfree(aux->used_maps);
}

void bpf_prog_put(struct bpf_prog *prog)
{
	if (atomic_dec_and_test(&prog->aux->refcnt)) {
		free_used_maps(prog->aux);
		bpf_prog_free(prog);
	}
}
EXPORT_SYMBOL_GPL(bpf_prog_put);

static int bpf_prog_release(struct inode *inode, struct file *filp)
{
	struct bpf_prog *prog = filp->private_data;

	bpf_prog_put(prog);
	return 0;
}

static const struct file_operations bpf_prog_fops = {
        .release = bpf_prog_release,
};

static struct bpf_prog *get_prog(struct fd f)
{
	struct bpf_prog *prog;

	if (!f.file)
		return ERR_PTR(-EBADF);

	if (f.file->f_op != &bpf_prog_fops) {
		fdput(f);
		return ERR_PTR(-EINVAL);
	}

	prog = f.file->private_data;

	return prog;
}

/* called by sockets/tracing/seccomp before attaching program to an event
 * pairs with bpf_prog_put()
 */
struct bpf_prog *bpf_prog_get(u32 ufd)
{
	struct fd f = fdget(ufd);
	struct bpf_prog *prog;

	prog = get_prog(f);

	if (IS_ERR(prog))
		return prog;

	atomic_inc(&prog->aux->refcnt);
	fdput(f);
	return prog;
}
EXPORT_SYMBOL_GPL(bpf_prog_get);

/* last field in 'union bpf_attr' used by this command */
#define	BPF_PROG_LOAD_LAST_FIELD log_buf

static int bpf_prog_load(union bpf_attr *attr)
{
	enum bpf_prog_type type = attr->prog_type;
	struct bpf_prog *prog;
	int err;
	char license[128];
	bool is_gpl;

	if (CHECK_ATTR(BPF_PROG_LOAD))
		return -EINVAL;

	/* copy eBPF program license from user space */
	if (strncpy_from_user(license, u64_to_ptr(attr->license),
			      sizeof(license) - 1) < 0)
		return -EFAULT;
	license[sizeof(license) - 1] = 0;

	/* eBPF programs must be GPL compatible to use GPL-ed functions */
	is_gpl = license_is_gpl_compatible(license);

	if (attr->insn_cnt >= BPF_MAXINSNS)
		return -EINVAL;

	/* plain bpf_prog allocation */
	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
	if (!prog)
		return -ENOMEM;

	prog->len = attr->insn_cnt;

	err = -EFAULT;
	if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
			   prog->len * sizeof(struct bpf_insn)) != 0)
		goto free_prog;

	prog->orig_prog = NULL;
	prog->jited = false;

	atomic_set(&prog->aux->refcnt, 1);
	prog->gpl_compatible = is_gpl;

	/* find program type: socket_filter vs tracing_filter */
	err = find_prog_type(type, prog);
	if (err < 0)
		goto free_prog;

	/* run eBPF verifier */
	err = bpf_check(prog, attr);
	if (err < 0)
		goto free_used_maps;

	/* fixup BPF_CALL->imm field */
	fixup_bpf_calls(prog);

	/* eBPF program is ready to be JITed */
	bpf_prog_select_runtime(prog);

	err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC);
	if (err < 0)
		/* failed to allocate fd */
		goto free_used_maps;

	return err;

free_used_maps:
	free_used_maps(prog->aux);
free_prog:
	bpf_prog_free(prog);
	return err;
}

SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
	union bpf_attr attr = {};
	int err;

	/* the syscall is limited to root temporarily. This restriction will be
	 * lifted when security audit is clean. Note that eBPF+tracing must have
	 * this restriction, since it may pass kernel data to user space
	 */
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	if (!access_ok(VERIFY_READ, uattr, 1))
		return -EFAULT;

	if (size > PAGE_SIZE)	/* silly large */
		return -E2BIG;

	/* If we're handed a bigger struct than we know of,
	 * ensure all the unknown bits are 0 - i.e. new
	 * user-space does not rely on any kernel feature
	 * extensions we dont know about yet.
	 */
	if (size > sizeof(attr)) {
		unsigned char __user *addr;
		unsigned char __user *end;
		unsigned char val;

		addr = (void __user *)uattr + sizeof(attr);
		end  = (void __user *)uattr + size;

		for (; addr < end; addr++) {
			err = get_user(val, addr);
			if (err)
				return err;
			if (val)
				return -E2BIG;
		}
		size = sizeof(attr);
	}

	/* copy attributes from user space, may be less than sizeof(bpf_attr) */
	if (copy_from_user(&attr, uattr, size) != 0)
		return -EFAULT;

	switch (cmd) {
	case BPF_MAP_CREATE:
		err = map_create(&attr);
		break;
	case BPF_MAP_LOOKUP_ELEM:
		err = map_lookup_elem(&attr);
		break;
	case BPF_MAP_UPDATE_ELEM:
		err = map_update_elem(&attr);
		break;
	case BPF_MAP_DELETE_ELEM:
		err = map_delete_elem(&attr);
		break;
	case BPF_MAP_GET_NEXT_KEY:
		err = map_get_next_key(&attr);
		break;
	case BPF_PROG_LOAD:
		err = bpf_prog_load(&attr);
		break;
	default:
		err = -EINVAL;
		break;
	}

	return err;
}
Commit	Line	Data
99c55f7d AS	1	/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
	2	*
	3	* This program is free software; you can redistribute it and/or
	4	* modify it under the terms of version 2 of the GNU General Public
	5	* License as published by the Free Software Foundation.
	6	*
	7	* This program is distributed in the hope that it will be useful, but
	8	* WITHOUT ANY WARRANTY; without even the implied warranty of
	9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	10	* General Public License for more details.
	11	*/
	12	#include <linux/bpf.h>
	13	#include <linux/syscalls.h>
	14	#include <linux/slab.h>
	15	#include <linux/anon_inodes.h>
db20fd2b	16	#include <linux/file.h>
09756af4 AS	17	#include <linux/license.h>
09756af4 AS	18	#include <linux/filter.h>
99c55f7d AS	19
	20	static LIST_HEAD(bpf_map_types);
	21
	22	static struct bpf_map find_and_alloc_map(union bpf_attr attr)
	23	{
	24	struct bpf_map_type_list *tl;
	25	struct bpf_map *map;
	26
	27	list_for_each_entry(tl, &bpf_map_types, list_node) {
	28	if (tl->type == attr->map_type) {
	29	map = tl->ops->map_alloc(attr);
	30	if (IS_ERR(map))
	31	return map;
	32	map->ops = tl->ops;
	33	map->map_type = attr->map_type;
	34	return map;
	35	}
	36	}
	37	return ERR_PTR(-EINVAL);
	38	}
	39
	40	/* boot time registration of different map implementations */
	41	void bpf_register_map_type(struct bpf_map_type_list *tl)
	42	{
	43	list_add(&tl->list_node, &bpf_map_types);
	44	}
	45
	46	/* called from workqueue */
	47	static void bpf_map_free_deferred(struct work_struct *work)
	48	{
	49	struct bpf_map *map = container_of(work, struct bpf_map, work);
	50
	51	/* implementation dependent freeing */
	52	map->ops->map_free(map);
	53	}
	54
	55	/* decrement map refcnt and schedule it for freeing via workqueue
	56	* (unrelying map implementation ops->map_free() might sleep)
	57	*/
	58	void bpf_map_put(struct bpf_map *map)
	59	{
	60	if (atomic_dec_and_test(&map->refcnt)) {
	61	INIT_WORK(&map->work, bpf_map_free_deferred);
	62	schedule_work(&map->work);
	63	}
	64	}
	65
	66	static int bpf_map_release(struct inode inode, struct file filp)
	67	{
	68	struct bpf_map *map = filp->private_data;
	69
	70	bpf_map_put(map);
	71	return 0;
	72	}
	73
	74	static const struct file_operations bpf_map_fops = {
	75	.release = bpf_map_release,
	76	};
	77
	78	/* helper macro to check that unused fields 'union bpf_attr' are zero */
	79	#define CHECK_ATTR(CMD) \
	80	memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
	81	sizeof(attr->CMD##_LAST_FIELD), 0, \
	82	sizeof(*attr) - \
83	offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
84	sizeof(attr->CMD##_LAST_FIELD)) != NULL
85
86	#define BPF_MAP_CREATE_LAST_FIELD max_entries
87	/* called via syscall */
88	static int map_create(union bpf_attr *attr)
89	{
90	struct bpf_map *map;
91	int err;
92
93	err = CHECK_ATTR(BPF_MAP_CREATE);
94	if (err)
95	return -EINVAL;
96
97	/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
98	map = find_and_alloc_map(attr);
99	if (IS_ERR(map))
100	return PTR_ERR(map);
101
102	atomic_set(&map->refcnt, 1);
103
104	err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR \| O_CLOEXEC);
105
106	if (err < 0)
107	/* failed to allocate fd */
108	goto free_map;
109
110	return err;
111
112	free_map:
113	map->ops->map_free(map);
114	return err;
115	}
116
db20fd2b AS	117	/* if error is returned, fd is released.
	118	* On success caller should complete fd access with matching fdput()
	119	*/
	120	struct bpf_map *bpf_map_get(struct fd f)
	121	{
	122	struct bpf_map *map;
	123
	124	if (!f.file)
	125	return ERR_PTR(-EBADF);
	126
	127	if (f.file->f_op != &bpf_map_fops) {
	128	fdput(f);
	129	return ERR_PTR(-EINVAL);
	130	}
	131
	132	map = f.file->private_data;
	133
	134	return map;
	135	}
	136
	137	/* helper to convert user pointers passed inside __aligned_u64 fields */
	138	static void __user *u64_to_ptr(__u64 val)
	139	{
	140	return (void __user *) (unsigned long) val;
	141	}
	142
	143	/* last field in 'union bpf_attr' used by this command */
	144	#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
	145
	146	static int map_lookup_elem(union bpf_attr *attr)
	147	{
	148	void __user *ukey = u64_to_ptr(attr->key);
	149	void __user *uvalue = u64_to_ptr(attr->value);
	150	int ufd = attr->map_fd;
	151	struct fd f = fdget(ufd);
	152	struct bpf_map *map;
8ebe667c	153	void key, value, *ptr;
db20fd2b AS	154	int err;
	155
	156	if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
	157	return -EINVAL;
	158
	159	map = bpf_map_get(f);
	160	if (IS_ERR(map))
	161	return PTR_ERR(map);
	162
	163	err = -ENOMEM;
	164	key = kmalloc(map->key_size, GFP_USER);
	165	if (!key)
	166	goto err_put;
	167
	168	err = -EFAULT;
	169	if (copy_from_user(key, ukey, map->key_size) != 0)
	170	goto free_key;
	171
8ebe667c AS	172	err = -ENOMEM;
8ebe667c AS	173	value = kmalloc(map->value_size, GFP_USER);
db20fd2b	174	if (!value)
8ebe667c AS	175	goto free_key;
	176
	177	rcu_read_lock();
	178	ptr = map->ops->map_lookup_elem(map, key);
	179	if (ptr)
	180	memcpy(value, ptr, map->value_size);
	181	rcu_read_unlock();
	182
	183	err = -ENOENT;
	184	if (!ptr)
	185	goto free_value;
db20fd2b AS	186
	187	err = -EFAULT;
	188	if (copy_to_user(uvalue, value, map->value_size) != 0)
8ebe667c	189	goto free_value;
db20fd2b AS	190
	191	err = 0;
	192
8ebe667c AS	193	free_value:
8ebe667c AS	194	kfree(value);
db20fd2b AS	195	free_key:
	196	kfree(key);
	197	err_put:
	198	fdput(f);
	199	return err;
	200	}
	201
3274f520	202	#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
db20fd2b AS	203
	204	static int map_update_elem(union bpf_attr *attr)
	205	{
	206	void __user *ukey = u64_to_ptr(attr->key);
	207	void __user *uvalue = u64_to_ptr(attr->value);
	208	int ufd = attr->map_fd;
	209	struct fd f = fdget(ufd);
	210	struct bpf_map *map;
	211	void key, value;
	212	int err;
	213
	214	if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
	215	return -EINVAL;
	216
	217	map = bpf_map_get(f);
	218	if (IS_ERR(map))
	219	return PTR_ERR(map);
	220
	221	err = -ENOMEM;
	222	key = kmalloc(map->key_size, GFP_USER);
	223	if (!key)
	224	goto err_put;
	225
	226	err = -EFAULT;
	227	if (copy_from_user(key, ukey, map->key_size) != 0)
	228	goto free_key;
	229
	230	err = -ENOMEM;
	231	value = kmalloc(map->value_size, GFP_USER);
	232	if (!value)
	233	goto free_key;
	234
	235	err = -EFAULT;
	236	if (copy_from_user(value, uvalue, map->value_size) != 0)
	237	goto free_value;
	238
	239	/* eBPF program that use maps are running under rcu_read_lock(),
	240	* therefore all map accessors rely on this fact, so do the same here
	241	*/
	242	rcu_read_lock();
3274f520	243	err = map->ops->map_update_elem(map, key, value, attr->flags);
db20fd2b AS	244	rcu_read_unlock();
	245
	246	free_value:
	247	kfree(value);
	248	free_key:
	249	kfree(key);
	250	err_put:
	251	fdput(f);
	252	return err;
	253	}
	254
	255	#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
	256
	257	static int map_delete_elem(union bpf_attr *attr)
	258	{
	259	void __user *ukey = u64_to_ptr(attr->key);
	260	int ufd = attr->map_fd;
	261	struct fd f = fdget(ufd);
	262	struct bpf_map *map;
	263	void *key;
	264	int err;
	265
	266	if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
	267	return -EINVAL;
	268
	269	map = bpf_map_get(f);
	270	if (IS_ERR(map))
	271	return PTR_ERR(map);
	272
	273	err = -ENOMEM;
	274	key = kmalloc(map->key_size, GFP_USER);
	275	if (!key)
	276	goto err_put;
	277
	278	err = -EFAULT;
	279	if (copy_from_user(key, ukey, map->key_size) != 0)
	280	goto free_key;
	281
	282	rcu_read_lock();
	283	err = map->ops->map_delete_elem(map, key);
	284	rcu_read_unlock();
	285
	286	free_key:
	287	kfree(key);
	288	err_put:
	289	fdput(f);
	290	return err;
	291	}
	292
	293	/* last field in 'union bpf_attr' used by this command */
	294	#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
	295
	296	static int map_get_next_key(union bpf_attr *attr)
	297	{
	298	void __user *ukey = u64_to_ptr(attr->key);
	299	void __user *unext_key = u64_to_ptr(attr->next_key);
	300	int ufd = attr->map_fd;
	301	struct fd f = fdget(ufd);
	302	struct bpf_map *map;
	303	void key, next_key;
	304	int err;
	305
	306	if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
	307	return -EINVAL;
308
309	map = bpf_map_get(f);
310	if (IS_ERR(map))
311	return PTR_ERR(map);
312
313	err = -ENOMEM;
314	key = kmalloc(map->key_size, GFP_USER);
315	if (!key)
316	goto err_put;
317
318	err = -EFAULT;
319	if (copy_from_user(key, ukey, map->key_size) != 0)
320	goto free_key;
321
322	err = -ENOMEM;
323	next_key = kmalloc(map->key_size, GFP_USER);
324	if (!next_key)
325	goto free_key;
326
327	rcu_read_lock();
328	err = map->ops->map_get_next_key(map, key, next_key);
329	rcu_read_unlock();
330	if (err)
331	goto free_next_key;
332
333	err = -EFAULT;
334	if (copy_to_user(unext_key, next_key, map->key_size) != 0)
335	goto free_next_key;
336
337	err = 0;
338
339	free_next_key:
340	kfree(next_key);
341	free_key:
342	kfree(key);
343	err_put:
344	fdput(f);
345	return err;
346	}
347
09756af4 AS	348	static LIST_HEAD(bpf_prog_types);
	349
	350	static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
	351	{
	352	struct bpf_prog_type_list *tl;
	353
	354	list_for_each_entry(tl, &bpf_prog_types, list_node) {
	355	if (tl->type == type) {
	356	prog->aux->ops = tl->ops;
24701ece	357	prog->type = type;
09756af4 AS	358	return 0;
	359	}
	360	}
24701ece	361
09756af4 AS	362	return -EINVAL;
	363	}
	364
	365	void bpf_register_prog_type(struct bpf_prog_type_list *tl)
	366	{
	367	list_add(&tl->list_node, &bpf_prog_types);
	368	}
	369
0a542a86 AS	370	/* fixup insn->imm field of bpf_call instructions:
	371	* if (insn->imm == BPF_FUNC_map_lookup_elem)
	372	* insn->imm = bpf_map_lookup_elem - __bpf_call_base;
	373	* else if (insn->imm == BPF_FUNC_map_update_elem)
	374	* insn->imm = bpf_map_update_elem - __bpf_call_base;
	375	* else ...
	376	*
	377	* this function is called after eBPF program passed verification
	378	*/
	379	static void fixup_bpf_calls(struct bpf_prog *prog)
	380	{
	381	const struct bpf_func_proto *fn;
	382	int i;
	383
	384	for (i = 0; i < prog->len; i++) {
	385	struct bpf_insn *insn = &prog->insnsi[i];
	386
	387	if (insn->code == (BPF_JMP \| BPF_CALL)) {
	388	/* we reach here when program has bpf_call instructions
	389	* and it passed bpf_check(), means that
	390	* ops->get_func_proto must have been supplied, check it
	391	*/
	392	BUG_ON(!prog->aux->ops->get_func_proto);
	393
	394	fn = prog->aux->ops->get_func_proto(insn->imm);
	395	/* all functions that have prototype and verifier allowed
	396	* programs to call them, must be real in-kernel functions
	397	*/
	398	BUG_ON(!fn->func);
	399	insn->imm = fn->func - __bpf_call_base;
	400	}
	401	}
	402	}
	403
09756af4 AS	404	/* drop refcnt on maps used by eBPF program and free auxilary data */
	405	static void free_used_maps(struct bpf_prog_aux *aux)
	406	{
	407	int i;
	408
	409	for (i = 0; i < aux->used_map_cnt; i++)
	410	bpf_map_put(aux->used_maps[i]);
	411
	412	kfree(aux->used_maps);
	413	}
	414
	415	void bpf_prog_put(struct bpf_prog *prog)
	416	{
	417	if (atomic_dec_and_test(&prog->aux->refcnt)) {
	418	free_used_maps(prog->aux);
	419	bpf_prog_free(prog);
	420	}
	421	}
e2e9b654	422	EXPORT_SYMBOL_GPL(bpf_prog_put);
09756af4 AS	423
	424	static int bpf_prog_release(struct inode inode, struct file filp)
	425	{
	426	struct bpf_prog *prog = filp->private_data;
	427
	428	bpf_prog_put(prog);
	429	return 0;
	430	}
	431
	432	static const struct file_operations bpf_prog_fops = {
	433	.release = bpf_prog_release,
	434	};
	435
	436	static struct bpf_prog *get_prog(struct fd f)
	437	{
	438	struct bpf_prog *prog;
	439
	440	if (!f.file)
	441	return ERR_PTR(-EBADF);
	442
	443	if (f.file->f_op != &bpf_prog_fops) {
	444	fdput(f);
	445	return ERR_PTR(-EINVAL);
	446	}
	447
	448	prog = f.file->private_data;
	449
	450	return prog;
	451	}
	452
	453	/* called by sockets/tracing/seccomp before attaching program to an event
	454	* pairs with bpf_prog_put()
	455	*/
	456	struct bpf_prog *bpf_prog_get(u32 ufd)
	457	{
	458	struct fd f = fdget(ufd);
	459	struct bpf_prog *prog;
	460
	461	prog = get_prog(f);
	462
	463	if (IS_ERR(prog))
	464	return prog;
	465
	466	atomic_inc(&prog->aux->refcnt);
	467	fdput(f);
	468	return prog;
	469	}
e2e9b654	470	EXPORT_SYMBOL_GPL(bpf_prog_get);
09756af4 AS	471
09756af4 AS	472	/* last field in 'union bpf_attr' used by this command */
cbd35700	473	#define BPF_PROG_LOAD_LAST_FIELD log_buf
09756af4 AS	474
	475	static int bpf_prog_load(union bpf_attr *attr)
	476	{
	477	enum bpf_prog_type type = attr->prog_type;
	478	struct bpf_prog *prog;
	479	int err;
	480	char license[128];
	481	bool is_gpl;
	482
	483	if (CHECK_ATTR(BPF_PROG_LOAD))
	484	return -EINVAL;
	485
	486	/* copy eBPF program license from user space */
	487	if (strncpy_from_user(license, u64_to_ptr(attr->license),
	488	sizeof(license) - 1) < 0)
	489	return -EFAULT;
	490	license[sizeof(license) - 1] = 0;
	491
	492	/* eBPF programs must be GPL compatible to use GPL-ed functions */
	493	is_gpl = license_is_gpl_compatible(license);
	494
	495	if (attr->insn_cnt >= BPF_MAXINSNS)
	496	return -EINVAL;
	497
	498	/* plain bpf_prog allocation */
	499	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
	500	if (!prog)
	501	return -ENOMEM;
	502
	503	prog->len = attr->insn_cnt;
	504
	505	err = -EFAULT;
	506	if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
	507	prog->len * sizeof(struct bpf_insn)) != 0)
	508	goto free_prog;
	509
	510	prog->orig_prog = NULL;
	511	prog->jited = false;
	512
	513	atomic_set(&prog->aux->refcnt, 1);
24701ece	514	prog->gpl_compatible = is_gpl;
09756af4 AS	515
	516	/* find program type: socket_filter vs tracing_filter */
	517	err = find_prog_type(type, prog);
	518	if (err < 0)
	519	goto free_prog;
	520
	521	/* run eBPF verifier */
51580e79	522	err = bpf_check(prog, attr);
09756af4 AS	523	if (err < 0)
	524	goto free_used_maps;
	525
0a542a86 AS	526	/* fixup BPF_CALL->imm field */
	527	fixup_bpf_calls(prog);
	528
09756af4 AS	529	/* eBPF program is ready to be JITed */
	530	bpf_prog_select_runtime(prog);
	531
	532	err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR \| O_CLOEXEC);
09756af4 AS	533	if (err < 0)
	534	/* failed to allocate fd */
	535	goto free_used_maps;
	536
	537	return err;
	538
	539	free_used_maps:
	540	free_used_maps(prog->aux);
	541	free_prog:
	542	bpf_prog_free(prog);
	543	return err;
	544	}
	545
99c55f7d AS	546	SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
	547	{
	548	union bpf_attr attr = {};
	549	int err;
	550
	551	/* the syscall is limited to root temporarily. This restriction will be
	552	* lifted when security audit is clean. Note that eBPF+tracing must have
	553	* this restriction, since it may pass kernel data to user space
	554	*/
	555	if (!capable(CAP_SYS_ADMIN))
	556	return -EPERM;
	557
	558	if (!access_ok(VERIFY_READ, uattr, 1))
	559	return -EFAULT;
	560
	561	if (size > PAGE_SIZE) /* silly large */
	562	return -E2BIG;
	563
	564	/* If we're handed a bigger struct than we know of,
	565	* ensure all the unknown bits are 0 - i.e. new
	566	* user-space does not rely on any kernel feature
	567	* extensions we dont know about yet.
	568	*/
	569	if (size > sizeof(attr)) {
	570	unsigned char __user *addr;
	571	unsigned char __user *end;
	572	unsigned char val;
	573
	574	addr = (void __user *)uattr + sizeof(attr);
	575	end = (void __user *)uattr + size;
	576
	577	for (; addr < end; addr++) {
	578	err = get_user(val, addr);
	579	if (err)
	580	return err;
	581	if (val)
	582	return -E2BIG;
	583	}
	584	size = sizeof(attr);
	585	}
	586
	587	/* copy attributes from user space, may be less than sizeof(bpf_attr) */
	588	if (copy_from_user(&attr, uattr, size) != 0)
	589	return -EFAULT;
	590
	591	switch (cmd) {
	592	case BPF_MAP_CREATE:
	593	err = map_create(&attr);
	594	break;
db20fd2b AS	595	case BPF_MAP_LOOKUP_ELEM:
	596	err = map_lookup_elem(&attr);
	597	break;
	598	case BPF_MAP_UPDATE_ELEM:
	599	err = map_update_elem(&attr);
	600	break;
	601	case BPF_MAP_DELETE_ELEM:
	602	err = map_delete_elem(&attr);
	603	break;
	604	case BPF_MAP_GET_NEXT_KEY:
	605	err = map_get_next_key(&attr);
	606	break;
09756af4 AS	607	case BPF_PROG_LOAD:
	608	err = bpf_prog_load(&attr);
	609	break;
99c55f7d AS	610	default:
	611	err = -EINVAL;
	612	break;
	613	}
	614
	615	return err;
	616	}