]> git.ipfire.org Git - people/arne_f/kernel.git/blame - net/socket.c
net: disable netpoll on fresh napis
[people/arne_f/kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
45c8178c 92#include <linux/nospec.h>
1da177e4 93
7c0f6ba6 94#include <linux/uaccess.h>
1da177e4
LT
95#include <asm/unistd.h>
96
97#include <net/compat.h>
87de87d5 98#include <net/wext.h>
f8451725 99#include <net/cls_cgroup.h>
1da177e4
LT
100
101#include <net/sock.h>
102#include <linux/netfilter.h>
103
6b96018b
AB
104#include <linux/if_tun.h>
105#include <linux/ipv6_route.h>
106#include <linux/route.h>
6b96018b
AB
107#include <linux/sockios.h>
108#include <linux/atalk.h>
076bb0c8 109#include <net/busy_poll.h>
f24b9be5 110#include <linux/errqueue.h>
06021292 111
e0d1095a 112#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
113unsigned int sysctl_net_busy_read __read_mostly;
114unsigned int sysctl_net_busy_poll __read_mostly;
06021292 115#endif
6b96018b 116
8ae5e030
AV
117static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
118static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 119static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
120
121static int sock_close(struct inode *inode, struct file *file);
122static unsigned int sock_poll(struct file *file,
123 struct poll_table_struct *wait);
89bddce5 124static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
125#ifdef CONFIG_COMPAT
126static long compat_sock_ioctl(struct file *file,
89bddce5 127 unsigned int cmd, unsigned long arg);
89bbfc95 128#endif
1da177e4 129static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
130static ssize_t sock_sendpage(struct file *file, struct page *page,
131 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 132static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 133 struct pipe_inode_info *pipe, size_t len,
9c55e01c 134 unsigned int flags);
1da177e4 135
1da177e4
LT
136/*
137 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
138 * in the operation structures but are done directly via the socketcall() multiplexor.
139 */
140
da7071d7 141static const struct file_operations socket_file_ops = {
1da177e4
LT
142 .owner = THIS_MODULE,
143 .llseek = no_llseek,
8ae5e030
AV
144 .read_iter = sock_read_iter,
145 .write_iter = sock_write_iter,
1da177e4
LT
146 .poll = sock_poll,
147 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
148#ifdef CONFIG_COMPAT
149 .compat_ioctl = compat_sock_ioctl,
150#endif
1da177e4 151 .mmap = sock_mmap,
1da177e4
LT
152 .release = sock_close,
153 .fasync = sock_fasync,
5274f052
JA
154 .sendpage = sock_sendpage,
155 .splice_write = generic_splice_sendpage,
9c55e01c 156 .splice_read = sock_splice_read,
1da177e4
LT
157};
158
159/*
160 * The protocol list. Each protocol is registered in here.
161 */
162
1da177e4 163static DEFINE_SPINLOCK(net_family_lock);
190683a9 164static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 165
1da177e4
LT
166/*
167 * Statistics counters of the socket lists
168 */
169
c6d409cf 170static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
171
172/*
89bddce5
SH
173 * Support routines.
174 * Move socket addresses back and forth across the kernel/user
175 * divide and look after the messy bits.
1da177e4
LT
176 */
177
1da177e4
LT
178/**
179 * move_addr_to_kernel - copy a socket address into kernel space
180 * @uaddr: Address in user space
181 * @kaddr: Address in kernel space
182 * @ulen: Length in user space
183 *
184 * The address is copied into kernel space. If the provided address is
185 * too long an error code of -EINVAL is returned. If the copy gives
186 * invalid addresses -EFAULT is returned. On a success 0 is returned.
187 */
188
43db362d 189int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 190{
230b1839 191 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 192 return -EINVAL;
89bddce5 193 if (ulen == 0)
1da177e4 194 return 0;
89bddce5 195 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 196 return -EFAULT;
3ec3b2fb 197 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
198}
199
200/**
201 * move_addr_to_user - copy an address to user space
202 * @kaddr: kernel space address
203 * @klen: length of address in kernel
204 * @uaddr: user space address
205 * @ulen: pointer to user length field
206 *
207 * The value pointed to by ulen on entry is the buffer length available.
208 * This is overwritten with the buffer space used. -EINVAL is returned
209 * if an overlong buffer is specified or a negative buffer size. -EFAULT
210 * is returned if either the buffer or the length field are not
211 * accessible.
212 * After copying the data up to the limit the user specifies, the true
213 * length of the data is written over the length limit the user
214 * specified. Zero is returned for a success.
215 */
89bddce5 216
43db362d 217static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 218 void __user *uaddr, int __user *ulen)
1da177e4
LT
219{
220 int err;
221 int len;
222
68c6beb3 223 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
224 err = get_user(len, ulen);
225 if (err)
1da177e4 226 return err;
89bddce5
SH
227 if (len > klen)
228 len = klen;
68c6beb3 229 if (len < 0)
1da177e4 230 return -EINVAL;
89bddce5 231 if (len) {
d6fe3945
SG
232 if (audit_sockaddr(klen, kaddr))
233 return -ENOMEM;
89bddce5 234 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
235 return -EFAULT;
236 }
237 /*
89bddce5
SH
238 * "fromlen shall refer to the value before truncation.."
239 * 1003.1g
1da177e4
LT
240 */
241 return __put_user(klen, ulen);
242}
243
e18b890b 244static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
245
246static struct inode *sock_alloc_inode(struct super_block *sb)
247{
248 struct socket_alloc *ei;
eaefd110 249 struct socket_wq *wq;
89bddce5 250
e94b1766 251 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
252 if (!ei)
253 return NULL;
eaefd110
ED
254 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
255 if (!wq) {
43815482
ED
256 kmem_cache_free(sock_inode_cachep, ei);
257 return NULL;
258 }
eaefd110
ED
259 init_waitqueue_head(&wq->wait);
260 wq->fasync_list = NULL;
574aab1e 261 wq->flags = 0;
eaefd110 262 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 263
1da177e4
LT
264 ei->socket.state = SS_UNCONNECTED;
265 ei->socket.flags = 0;
266 ei->socket.ops = NULL;
267 ei->socket.sk = NULL;
268 ei->socket.file = NULL;
1da177e4
LT
269
270 return &ei->vfs_inode;
271}
272
273static void sock_destroy_inode(struct inode *inode)
274{
43815482 275 struct socket_alloc *ei;
eaefd110 276 struct socket_wq *wq;
43815482
ED
277
278 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 279 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 280 kfree_rcu(wq, rcu);
43815482 281 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
282}
283
51cc5068 284static void init_once(void *foo)
1da177e4 285{
89bddce5 286 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 287
a35afb83 288 inode_init_once(&ei->vfs_inode);
1da177e4 289}
89bddce5 290
1e911632 291static void init_inodecache(void)
1da177e4
LT
292{
293 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
294 sizeof(struct socket_alloc),
295 0,
296 (SLAB_HWCACHE_ALIGN |
297 SLAB_RECLAIM_ACCOUNT |
5d097056 298 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 299 init_once);
1e911632 300 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
301}
302
b87221de 303static const struct super_operations sockfs_ops = {
c6d409cf
ED
304 .alloc_inode = sock_alloc_inode,
305 .destroy_inode = sock_destroy_inode,
306 .statfs = simple_statfs,
1da177e4
LT
307};
308
c23fbb6b
ED
309/*
310 * sockfs_dname() is called from d_path().
311 */
312static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
313{
314 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 315 d_inode(dentry)->i_ino);
c23fbb6b
ED
316}
317
3ba13d17 318static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 319 .d_dname = sockfs_dname,
1da177e4
LT
320};
321
bba0bd31
AG
322static int sockfs_xattr_get(const struct xattr_handler *handler,
323 struct dentry *dentry, struct inode *inode,
324 const char *suffix, void *value, size_t size)
325{
326 if (value) {
327 if (dentry->d_name.len + 1 > size)
328 return -ERANGE;
329 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
330 }
331 return dentry->d_name.len + 1;
332}
333
334#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
335#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
336#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
337
338static const struct xattr_handler sockfs_xattr_handler = {
339 .name = XATTR_NAME_SOCKPROTONAME,
340 .get = sockfs_xattr_get,
341};
342
4a590153
AG
343static int sockfs_security_xattr_set(const struct xattr_handler *handler,
344 struct dentry *dentry, struct inode *inode,
345 const char *suffix, const void *value,
346 size_t size, int flags)
347{
348 /* Handled by LSM. */
349 return -EAGAIN;
350}
351
352static const struct xattr_handler sockfs_security_xattr_handler = {
353 .prefix = XATTR_SECURITY_PREFIX,
354 .set = sockfs_security_xattr_set,
355};
356
bba0bd31
AG
357static const struct xattr_handler *sockfs_xattr_handlers[] = {
358 &sockfs_xattr_handler,
4a590153 359 &sockfs_security_xattr_handler,
bba0bd31
AG
360 NULL
361};
362
c74a1cbb
AV
363static struct dentry *sockfs_mount(struct file_system_type *fs_type,
364 int flags, const char *dev_name, void *data)
365{
bba0bd31
AG
366 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
367 sockfs_xattr_handlers,
368 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
369}
370
371static struct vfsmount *sock_mnt __read_mostly;
372
373static struct file_system_type sock_fs_type = {
374 .name = "sockfs",
375 .mount = sockfs_mount,
376 .kill_sb = kill_anon_super,
377};
378
1da177e4
LT
379/*
380 * Obtains the first available file descriptor and sets it up for use.
381 *
39d8c1b6
DM
382 * These functions create file structures and maps them to fd space
383 * of the current process. On success it returns file descriptor
1da177e4
LT
384 * and file struct implicitly stored in sock->file.
385 * Note that another thread may close file descriptor before we return
386 * from this function. We use the fact that now we do not refer
387 * to socket after mapping. If one day we will need it, this
388 * function will increment ref. count on file by 1.
389 *
390 * In any case returned fd MAY BE not valid!
391 * This race condition is unavoidable
392 * with shared fd spaces, we cannot solve it inside kernel,
393 * but we take care of internal coherence yet.
394 */
395
aab174f0 396struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 397{
7cbe66b6 398 struct qstr name = { .name = "" };
2c48b9c4 399 struct path path;
7cbe66b6 400 struct file *file;
1da177e4 401
600e1779
MY
402 if (dname) {
403 name.name = dname;
404 name.len = strlen(name.name);
405 } else if (sock->sk) {
406 name.name = sock->sk->sk_prot_creator->name;
407 name.len = strlen(name.name);
408 }
4b936885 409 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
410 if (unlikely(!path.dentry))
411 return ERR_PTR(-ENOMEM);
2c48b9c4 412 path.mnt = mntget(sock_mnt);
39d8c1b6 413
2c48b9c4 414 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 415
2c48b9c4 416 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 417 &socket_file_ops);
b5ffe634 418 if (IS_ERR(file)) {
cc3808f8 419 /* drop dentry, keep inode */
c5ef6035 420 ihold(d_inode(path.dentry));
2c48b9c4 421 path_put(&path);
39b65252 422 return file;
cc3808f8
AV
423 }
424
425 sock->file = file;
77d27200 426 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 427 file->private_data = sock;
28407630 428 return file;
39d8c1b6 429}
56b31d1c 430EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 431
56b31d1c 432static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
433{
434 struct file *newfile;
28407630
AV
435 int fd = get_unused_fd_flags(flags);
436 if (unlikely(fd < 0))
437 return fd;
39d8c1b6 438
aab174f0 439 newfile = sock_alloc_file(sock, flags, NULL);
28407630 440 if (likely(!IS_ERR(newfile))) {
39d8c1b6 441 fd_install(fd, newfile);
28407630
AV
442 return fd;
443 }
7cbe66b6 444
28407630
AV
445 put_unused_fd(fd);
446 return PTR_ERR(newfile);
1da177e4
LT
447}
448
406a3c63 449struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 450{
6cb153ca
BL
451 if (file->f_op == &socket_file_ops)
452 return file->private_data; /* set in sock_map_fd */
453
23bb80d2
ED
454 *err = -ENOTSOCK;
455 return NULL;
6cb153ca 456}
406a3c63 457EXPORT_SYMBOL(sock_from_file);
6cb153ca 458
1da177e4 459/**
c6d409cf 460 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
461 * @fd: file handle
462 * @err: pointer to an error code return
463 *
464 * The file handle passed in is locked and the socket it is bound
241c4667 465 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
466 * with a negative errno code and NULL is returned. The function checks
467 * for both invalid handles and passing a handle which is not a socket.
468 *
469 * On a success the socket object pointer is returned.
470 */
471
472struct socket *sockfd_lookup(int fd, int *err)
473{
474 struct file *file;
1da177e4
LT
475 struct socket *sock;
476
89bddce5
SH
477 file = fget(fd);
478 if (!file) {
1da177e4
LT
479 *err = -EBADF;
480 return NULL;
481 }
89bddce5 482
6cb153ca
BL
483 sock = sock_from_file(file, err);
484 if (!sock)
1da177e4 485 fput(file);
6cb153ca
BL
486 return sock;
487}
c6d409cf 488EXPORT_SYMBOL(sockfd_lookup);
1da177e4 489
6cb153ca
BL
490static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
491{
00e188ef 492 struct fd f = fdget(fd);
6cb153ca
BL
493 struct socket *sock;
494
3672558c 495 *err = -EBADF;
00e188ef
AV
496 if (f.file) {
497 sock = sock_from_file(f.file, err);
498 if (likely(sock)) {
bb70a1cb 499 *fput_needed = f.flags & FDPUT_FPUT;
6cb153ca 500 return sock;
00e188ef
AV
501 }
502 fdput(f);
1da177e4 503 }
6cb153ca 504 return NULL;
1da177e4
LT
505}
506
600e1779
MY
507static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
508 size_t size)
509{
510 ssize_t len;
511 ssize_t used = 0;
512
c5ef6035 513 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
514 if (len < 0)
515 return len;
516 used += len;
517 if (buffer) {
518 if (size < used)
519 return -ERANGE;
520 buffer += len;
521 }
522
523 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
524 used += len;
525 if (buffer) {
526 if (size < used)
527 return -ERANGE;
528 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
529 buffer += len;
530 }
531
532 return used;
533}
534
dc647ec8 535static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
536{
537 int err = simple_setattr(dentry, iattr);
538
e1a3a60a 539 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
540 struct socket *sock = SOCKET_I(d_inode(dentry));
541
91717ffc
CW
542 if (sock->sk)
543 sock->sk->sk_uid = iattr->ia_uid;
544 else
545 err = -ENOENT;
86741ec2
LC
546 }
547
548 return err;
549}
550
600e1779 551static const struct inode_operations sockfs_inode_ops = {
600e1779 552 .listxattr = sockfs_listxattr,
86741ec2 553 .setattr = sockfs_setattr,
600e1779
MY
554};
555
1da177e4
LT
556/**
557 * sock_alloc - allocate a socket
89bddce5 558 *
1da177e4
LT
559 * Allocate a new inode and socket object. The two are bound together
560 * and initialised. The socket is then returned. If we are out of inodes
561 * NULL is returned.
562 */
563
f4a00aac 564struct socket *sock_alloc(void)
1da177e4 565{
89bddce5
SH
566 struct inode *inode;
567 struct socket *sock;
1da177e4 568
a209dfc7 569 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
570 if (!inode)
571 return NULL;
572
573 sock = SOCKET_I(inode);
574
85fe4025 575 inode->i_ino = get_next_ino();
89bddce5 576 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
577 inode->i_uid = current_fsuid();
578 inode->i_gid = current_fsgid();
600e1779 579 inode->i_op = &sockfs_inode_ops;
1da177e4 580
19e8d69c 581 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
582 return sock;
583}
f4a00aac 584EXPORT_SYMBOL(sock_alloc);
1da177e4 585
1da177e4
LT
586/**
587 * sock_release - close a socket
588 * @sock: socket to close
589 *
590 * The socket is released from the protocol stack if it has a release
591 * callback, and the inode is then released if the socket is bound to
89bddce5 592 * an inode not a file.
1da177e4 593 */
89bddce5 594
91717ffc 595static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
596{
597 if (sock->ops) {
598 struct module *owner = sock->ops->owner;
599
91717ffc
CW
600 if (inode)
601 inode_lock(inode);
1da177e4 602 sock->ops->release(sock);
e5e8350d 603 sock->sk = NULL;
91717ffc
CW
604 if (inode)
605 inode_unlock(inode);
1da177e4
LT
606 sock->ops = NULL;
607 module_put(owner);
608 }
609
eaefd110 610 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 611 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 612
19e8d69c 613 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
614 if (!sock->file) {
615 iput(SOCK_INODE(sock));
616 return;
617 }
89bddce5 618 sock->file = NULL;
1da177e4 619}
91717ffc
CW
620
621void sock_release(struct socket *sock)
622{
623 __sock_release(sock, NULL);
624}
c6d409cf 625EXPORT_SYMBOL(sock_release);
1da177e4 626
c14ac945 627void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 628{
140c55d4
ED
629 u8 flags = *tx_flags;
630
c14ac945 631 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
632 flags |= SKBTX_HW_TSTAMP;
633
c14ac945 634 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
635 flags |= SKBTX_SW_TSTAMP;
636
c14ac945 637 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
638 flags |= SKBTX_SCHED_TSTAMP;
639
140c55d4 640 *tx_flags = flags;
20d49473 641}
67cc0d40 642EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 643
d8725c86 644static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 645{
01e97e65 646 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
647 BUG_ON(ret == -EIOCBQUEUED);
648 return ret;
1da177e4
LT
649}
650
d8725c86 651int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 652{
d8725c86 653 int err = security_socket_sendmsg(sock, msg,
01e97e65 654 msg_data_left(msg));
228e548e 655
d8725c86 656 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 657}
c6d409cf 658EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
659
660int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
661 struct kvec *vec, size_t num, size_t size)
662{
6aa24814 663 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 664 return sock_sendmsg(sock, msg);
1da177e4 665}
c6d409cf 666EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 667
306b13eb
TH
668int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
669 struct kvec *vec, size_t num, size_t size)
670{
671 struct socket *sock = sk->sk_socket;
672
673 if (!sock->ops->sendmsg_locked)
db5980d8 674 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb
TH
675
676 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
677
678 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
679}
680EXPORT_SYMBOL(kernel_sendmsg_locked);
681
8605330a
SHY
682static bool skb_is_err_queue(const struct sk_buff *skb)
683{
684 /* pkt_type of skbs enqueued on the error queue are set to
685 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
686 * in recvmsg, since skbs received on a local socket will never
687 * have a pkt_type of PACKET_OUTGOING.
688 */
689 return skb->pkt_type == PACKET_OUTGOING;
690}
691
b50a5c70
ML
692/* On transmit, software and hardware timestamps are returned independently.
693 * As the two skb clones share the hardware timestamp, which may be updated
694 * before the software timestamp is received, a hardware TX timestamp may be
695 * returned only if there is no software TX timestamp. Ignore false software
696 * timestamps, which may be made in the __sock_recv_timestamp() call when the
697 * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a
698 * hardware timestamp.
699 */
700static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
701{
702 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
703}
704
aad9c8c4
ML
705static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
706{
707 struct scm_ts_pktinfo ts_pktinfo;
708 struct net_device *orig_dev;
709
710 if (!skb_mac_header_was_set(skb))
711 return;
712
713 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
714
715 rcu_read_lock();
716 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
717 if (orig_dev)
718 ts_pktinfo.if_index = orig_dev->ifindex;
719 rcu_read_unlock();
720
721 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
722 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
723 sizeof(ts_pktinfo), &ts_pktinfo);
724}
725
92f37fd2
ED
726/*
727 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
728 */
729void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
730 struct sk_buff *skb)
731{
20d49473 732 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 733 struct scm_timestamping tss;
b50a5c70 734 int empty = 1, false_tstamp = 0;
20d49473
PO
735 struct skb_shared_hwtstamps *shhwtstamps =
736 skb_hwtstamps(skb);
737
738 /* Race occurred between timestamp enabling and packet
739 receiving. Fill in the current time for now. */
b50a5c70 740 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 741 __net_timestamp(skb);
b50a5c70
ML
742 false_tstamp = 1;
743 }
20d49473
PO
744
745 if (need_software_tstamp) {
746 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
747 struct timeval tv;
748 skb_get_timestamp(skb, &tv);
749 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
750 sizeof(tv), &tv);
751 } else {
f24b9be5
WB
752 struct timespec ts;
753 skb_get_timestampns(skb, &ts);
20d49473 754 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 755 sizeof(ts), &ts);
20d49473
PO
756 }
757 }
758
f24b9be5 759 memset(&tss, 0, sizeof(tss));
c199105d 760 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 761 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 762 empty = 0;
4d276eb6 763 if (shhwtstamps &&
b9f40e21 764 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 765 !skb_is_swtx_tstamp(skb, false_tstamp) &&
aad9c8c4 766 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 767 empty = 0;
aad9c8c4
ML
768 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
769 !skb_is_err_queue(skb))
770 put_ts_pktinfo(msg, skb);
771 }
1c885808 772 if (!empty) {
20d49473 773 put_cmsg(msg, SOL_SOCKET,
f24b9be5 774 SCM_TIMESTAMPING, sizeof(tss), &tss);
1c885808 775
8605330a 776 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 777 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
778 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
779 skb->len, skb->data);
780 }
92f37fd2 781}
7c81fd8b
ACM
782EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
783
6e3e939f
JB
784void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
785 struct sk_buff *skb)
786{
787 int ack;
788
789 if (!sock_flag(sk, SOCK_WIFI_STATUS))
790 return;
791 if (!skb->wifi_acked_valid)
792 return;
793
794 ack = skb->wifi_acked;
795
796 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
797}
798EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
799
11165f14 800static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
801 struct sk_buff *skb)
3b885787 802{
744d5a3e 803 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 804 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 805 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
806}
807
767dd033 808void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
809 struct sk_buff *skb)
810{
811 sock_recv_timestamp(msg, sk, skb);
812 sock_recv_drops(msg, sk, skb);
813}
767dd033 814EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 815
1b784140 816static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 817 int flags)
1da177e4 818{
2da62906 819 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
820}
821
2da62906 822int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 823{
2da62906 824 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 825
2da62906 826 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 827}
c6d409cf 828EXPORT_SYMBOL(sock_recvmsg);
1da177e4 829
c1249c0a
ML
830/**
831 * kernel_recvmsg - Receive a message from a socket (kernel space)
832 * @sock: The socket to receive the message from
833 * @msg: Received message
834 * @vec: Input s/g array for message data
835 * @num: Size of input s/g array
836 * @size: Number of bytes to read
837 * @flags: Message flags (MSG_DONTWAIT, etc...)
838 *
839 * On return the msg structure contains the scatter/gather array passed in the
840 * vec argument. The array is modified so that it consists of the unfilled
841 * portion of the original array.
842 *
843 * The returned value is the total number of bytes received, or an error.
844 */
89bddce5
SH
845int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
846 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
847{
848 mm_segment_t oldfs = get_fs();
849 int result;
850
6aa24814 851 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 852 set_fs(KERNEL_DS);
2da62906 853 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
854 set_fs(oldfs);
855 return result;
856}
c6d409cf 857EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 858
ce1d4d3e
CH
859static ssize_t sock_sendpage(struct file *file, struct page *page,
860 int offset, size_t size, loff_t *ppos, int more)
1da177e4 861{
1da177e4
LT
862 struct socket *sock;
863 int flags;
864
ce1d4d3e
CH
865 sock = file->private_data;
866
35f9c09f
ED
867 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
868 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
869 flags |= more;
ce1d4d3e 870
e6949583 871 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 872}
1da177e4 873
9c55e01c 874static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 875 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
876 unsigned int flags)
877{
878 struct socket *sock = file->private_data;
879
997b37da
RDC
880 if (unlikely(!sock->ops->splice_read))
881 return -EINVAL;
882
9c55e01c
JA
883 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
884}
885
8ae5e030 886static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 887{
6d652330
AV
888 struct file *file = iocb->ki_filp;
889 struct socket *sock = file->private_data;
0345f931 890 struct msghdr msg = {.msg_iter = *to,
891 .msg_iocb = iocb};
8ae5e030 892 ssize_t res;
ce1d4d3e 893
f42504ab 894 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
895 msg.msg_flags = MSG_DONTWAIT;
896
897 if (iocb->ki_pos != 0)
1da177e4 898 return -ESPIPE;
027445c3 899
66ee59af 900 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
901 return 0;
902
2da62906 903 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
904 *to = msg.msg_iter;
905 return res;
1da177e4
LT
906}
907
8ae5e030 908static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 909{
6d652330
AV
910 struct file *file = iocb->ki_filp;
911 struct socket *sock = file->private_data;
0345f931 912 struct msghdr msg = {.msg_iter = *from,
913 .msg_iocb = iocb};
8ae5e030 914 ssize_t res;
1da177e4 915
8ae5e030 916 if (iocb->ki_pos != 0)
ce1d4d3e 917 return -ESPIPE;
027445c3 918
f42504ab 919 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
920 msg.msg_flags = MSG_DONTWAIT;
921
6d652330
AV
922 if (sock->type == SOCK_SEQPACKET)
923 msg.msg_flags |= MSG_EOR;
924
d8725c86 925 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
926 *from = msg.msg_iter;
927 return res;
1da177e4
LT
928}
929
1da177e4
LT
930/*
931 * Atomic setting of ioctl hooks to avoid race
932 * with module unload.
933 */
934
4a3e2f71 935static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 936static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 937
881d966b 938void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 939{
4a3e2f71 940 mutex_lock(&br_ioctl_mutex);
1da177e4 941 br_ioctl_hook = hook;
4a3e2f71 942 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
943}
944EXPORT_SYMBOL(brioctl_set);
945
4a3e2f71 946static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 947static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 948
881d966b 949void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 950{
4a3e2f71 951 mutex_lock(&vlan_ioctl_mutex);
1da177e4 952 vlan_ioctl_hook = hook;
4a3e2f71 953 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
954}
955EXPORT_SYMBOL(vlan_ioctl_set);
956
4a3e2f71 957static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 958static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 959
89bddce5 960void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 961{
4a3e2f71 962 mutex_lock(&dlci_ioctl_mutex);
1da177e4 963 dlci_ioctl_hook = hook;
4a3e2f71 964 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
965}
966EXPORT_SYMBOL(dlci_ioctl_set);
967
6b96018b
AB
968static long sock_do_ioctl(struct net *net, struct socket *sock,
969 unsigned int cmd, unsigned long arg)
970{
971 int err;
972 void __user *argp = (void __user *)arg;
973
974 err = sock->ops->ioctl(sock, cmd, arg);
975
976 /*
977 * If this ioctl is unknown try to hand it down
978 * to the NIC driver.
979 */
980 if (err == -ENOIOCTLCMD)
981 err = dev_ioctl(net, cmd, argp);
982
983 return err;
984}
985
1da177e4
LT
986/*
987 * With an ioctl, arg may well be a user mode pointer, but we don't know
988 * what to do with it - that's up to the protocol still.
989 */
990
c62cce2c
AV
991static struct ns_common *get_net_ns(struct ns_common *ns)
992{
993 return &get_net(container_of(ns, struct net, ns))->ns;
994}
995
1da177e4
LT
996static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
997{
998 struct socket *sock;
881d966b 999 struct sock *sk;
1da177e4
LT
1000 void __user *argp = (void __user *)arg;
1001 int pid, err;
881d966b 1002 struct net *net;
1da177e4 1003
b69aee04 1004 sock = file->private_data;
881d966b 1005 sk = sock->sk;
3b1e0a65 1006 net = sock_net(sk);
1da177e4 1007 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 1008 err = dev_ioctl(net, cmd, argp);
1da177e4 1009 } else
3d23e349 1010#ifdef CONFIG_WEXT_CORE
1da177e4 1011 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 1012 err = dev_ioctl(net, cmd, argp);
1da177e4 1013 } else
3d23e349 1014#endif
89bddce5 1015 switch (cmd) {
1da177e4
LT
1016 case FIOSETOWN:
1017 case SIOCSPGRP:
1018 err = -EFAULT;
1019 if (get_user(pid, (int __user *)argp))
1020 break;
393cc3f5 1021 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1022 break;
1023 case FIOGETOWN:
1024 case SIOCGPGRP:
609d7fa9 1025 err = put_user(f_getown(sock->file),
89bddce5 1026 (int __user *)argp);
1da177e4
LT
1027 break;
1028 case SIOCGIFBR:
1029 case SIOCSIFBR:
1030 case SIOCBRADDBR:
1031 case SIOCBRDELBR:
1032 err = -ENOPKG;
1033 if (!br_ioctl_hook)
1034 request_module("bridge");
1035
4a3e2f71 1036 mutex_lock(&br_ioctl_mutex);
89bddce5 1037 if (br_ioctl_hook)
881d966b 1038 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1039 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1040 break;
1041 case SIOCGIFVLAN:
1042 case SIOCSIFVLAN:
1043 err = -ENOPKG;
1044 if (!vlan_ioctl_hook)
1045 request_module("8021q");
1046
4a3e2f71 1047 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1048 if (vlan_ioctl_hook)
881d966b 1049 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1050 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1051 break;
1da177e4
LT
1052 case SIOCADDDLCI:
1053 case SIOCDELDLCI:
1054 err = -ENOPKG;
1055 if (!dlci_ioctl_hook)
1056 request_module("dlci");
1057
7512cbf6
PE
1058 mutex_lock(&dlci_ioctl_mutex);
1059 if (dlci_ioctl_hook)
1da177e4 1060 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1061 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1062 break;
c62cce2c
AV
1063 case SIOCGSKNS:
1064 err = -EPERM;
1065 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1066 break;
1067
1068 err = open_related_ns(&net->ns, get_net_ns);
1069 break;
1da177e4 1070 default:
6b96018b 1071 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1072 break;
89bddce5 1073 }
1da177e4
LT
1074 return err;
1075}
1076
1077int sock_create_lite(int family, int type, int protocol, struct socket **res)
1078{
1079 int err;
1080 struct socket *sock = NULL;
89bddce5 1081
1da177e4
LT
1082 err = security_socket_create(family, type, protocol, 1);
1083 if (err)
1084 goto out;
1085
1086 sock = sock_alloc();
1087 if (!sock) {
1088 err = -ENOMEM;
1089 goto out;
1090 }
1091
1da177e4 1092 sock->type = type;
7420ed23
VY
1093 err = security_socket_post_create(sock, family, type, protocol, 1);
1094 if (err)
1095 goto out_release;
1096
1da177e4
LT
1097out:
1098 *res = sock;
1099 return err;
7420ed23
VY
1100out_release:
1101 sock_release(sock);
1102 sock = NULL;
1103 goto out;
1da177e4 1104}
c6d409cf 1105EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1106
1107/* No kernel lock held - perfect */
89bddce5 1108static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 1109{
cbf55001 1110 unsigned int busy_flag = 0;
1da177e4
LT
1111 struct socket *sock;
1112
1113 /*
89bddce5 1114 * We can't return errors to poll, so it's either yes or no.
1da177e4 1115 */
b69aee04 1116 sock = file->private_data;
2d48d67f 1117
cbf55001 1118 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1119 /* this socket can poll_ll so tell the system call */
cbf55001 1120 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1121
1122 /* once, only if requested by syscall */
cbf55001
ET
1123 if (wait && (wait->_key & POLL_BUSY_LOOP))
1124 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1125 }
1126
cbf55001 1127 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1128}
1129
89bddce5 1130static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1131{
b69aee04 1132 struct socket *sock = file->private_data;
1da177e4
LT
1133
1134 return sock->ops->mmap(file, sock, vma);
1135}
1136
20380731 1137static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1138{
91717ffc 1139 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1140 return 0;
1141}
1142
1143/*
1144 * Update the socket async list
1145 *
1146 * Fasync_list locking strategy.
1147 *
1148 * 1. fasync_list is modified only under process context socket lock
1149 * i.e. under semaphore.
1150 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1151 * or under socket lock
1da177e4
LT
1152 */
1153
1154static int sock_fasync(int fd, struct file *filp, int on)
1155{
989a2979
ED
1156 struct socket *sock = filp->private_data;
1157 struct sock *sk = sock->sk;
eaefd110 1158 struct socket_wq *wq;
1da177e4 1159
989a2979 1160 if (sk == NULL)
1da177e4 1161 return -EINVAL;
1da177e4
LT
1162
1163 lock_sock(sk);
1e1d04e6 1164 wq = rcu_dereference_protected(sock->wq, lockdep_sock_is_held(sk));
eaefd110 1165 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1166
eaefd110 1167 if (!wq->fasync_list)
989a2979
ED
1168 sock_reset_flag(sk, SOCK_FASYNC);
1169 else
bcdce719 1170 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1171
989a2979 1172 release_sock(sk);
1da177e4
LT
1173 return 0;
1174}
1175
ceb5d58b 1176/* This function may be called only under rcu_lock */
1da177e4 1177
ceb5d58b 1178int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1179{
ceb5d58b 1180 if (!wq || !wq->fasync_list)
1da177e4 1181 return -1;
ceb5d58b 1182
89bddce5 1183 switch (how) {
8d8ad9d7 1184 case SOCK_WAKE_WAITD:
ceb5d58b 1185 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1186 break;
1187 goto call_kill;
8d8ad9d7 1188 case SOCK_WAKE_SPACE:
ceb5d58b 1189 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1190 break;
1191 /* fall through */
8d8ad9d7 1192 case SOCK_WAKE_IO:
89bddce5 1193call_kill:
43815482 1194 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1195 break;
8d8ad9d7 1196 case SOCK_WAKE_URG:
43815482 1197 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1198 }
ceb5d58b 1199
1da177e4
LT
1200 return 0;
1201}
c6d409cf 1202EXPORT_SYMBOL(sock_wake_async);
1da177e4 1203
721db93a 1204int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1205 struct socket **res, int kern)
1da177e4
LT
1206{
1207 int err;
1208 struct socket *sock;
55737fda 1209 const struct net_proto_family *pf;
1da177e4
LT
1210
1211 /*
89bddce5 1212 * Check protocol is in range
1da177e4
LT
1213 */
1214 if (family < 0 || family >= NPROTO)
1215 return -EAFNOSUPPORT;
1216 if (type < 0 || type >= SOCK_MAX)
1217 return -EINVAL;
1218
1219 /* Compatibility.
1220
1221 This uglymoron is moved from INET layer to here to avoid
1222 deadlock in module load.
1223 */
1224 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1225 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1226 current->comm);
1da177e4
LT
1227 family = PF_PACKET;
1228 }
1229
1230 err = security_socket_create(family, type, protocol, kern);
1231 if (err)
1232 return err;
89bddce5 1233
55737fda
SH
1234 /*
1235 * Allocate the socket and allow the family to set things up. if
1236 * the protocol is 0, the family is instructed to select an appropriate
1237 * default.
1238 */
1239 sock = sock_alloc();
1240 if (!sock) {
e87cc472 1241 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1242 return -ENFILE; /* Not exactly a match, but its the
1243 closest posix thing */
1244 }
1245
1246 sock->type = type;
1247
95a5afca 1248#ifdef CONFIG_MODULES
89bddce5
SH
1249 /* Attempt to load a protocol module if the find failed.
1250 *
1251 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1252 * requested real, full-featured networking support upon configuration.
1253 * Otherwise module support will break!
1254 */
190683a9 1255 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1256 request_module("net-pf-%d", family);
1da177e4
LT
1257#endif
1258
55737fda
SH
1259 rcu_read_lock();
1260 pf = rcu_dereference(net_families[family]);
1261 err = -EAFNOSUPPORT;
1262 if (!pf)
1263 goto out_release;
1da177e4
LT
1264
1265 /*
1266 * We will call the ->create function, that possibly is in a loadable
1267 * module, so we have to bump that loadable module refcnt first.
1268 */
55737fda 1269 if (!try_module_get(pf->owner))
1da177e4
LT
1270 goto out_release;
1271
55737fda
SH
1272 /* Now protected by module ref count */
1273 rcu_read_unlock();
1274
3f378b68 1275 err = pf->create(net, sock, protocol, kern);
55737fda 1276 if (err < 0)
1da177e4 1277 goto out_module_put;
a79af59e 1278
1da177e4
LT
1279 /*
1280 * Now to bump the refcnt of the [loadable] module that owns this
1281 * socket at sock_release time we decrement its refcnt.
1282 */
55737fda
SH
1283 if (!try_module_get(sock->ops->owner))
1284 goto out_module_busy;
1285
1da177e4
LT
1286 /*
1287 * Now that we're done with the ->create function, the [loadable]
1288 * module can have its refcnt decremented
1289 */
55737fda 1290 module_put(pf->owner);
7420ed23
VY
1291 err = security_socket_post_create(sock, family, type, protocol, kern);
1292 if (err)
3b185525 1293 goto out_sock_release;
55737fda 1294 *res = sock;
1da177e4 1295
55737fda
SH
1296 return 0;
1297
1298out_module_busy:
1299 err = -EAFNOSUPPORT;
1da177e4 1300out_module_put:
55737fda
SH
1301 sock->ops = NULL;
1302 module_put(pf->owner);
1303out_sock_release:
1da177e4 1304 sock_release(sock);
55737fda
SH
1305 return err;
1306
1307out_release:
1308 rcu_read_unlock();
1309 goto out_sock_release;
1da177e4 1310}
721db93a 1311EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1312
1313int sock_create(int family, int type, int protocol, struct socket **res)
1314{
1b8d7ae4 1315 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1316}
c6d409cf 1317EXPORT_SYMBOL(sock_create);
1da177e4 1318
eeb1bd5c 1319int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1320{
eeb1bd5c 1321 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1322}
c6d409cf 1323EXPORT_SYMBOL(sock_create_kern);
1da177e4 1324
3e0fa65f 1325SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1326{
1327 int retval;
1328 struct socket *sock;
a677a039
UD
1329 int flags;
1330
e38b36f3
UD
1331 /* Check the SOCK_* constants for consistency. */
1332 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1333 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1334 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1335 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1336
a677a039 1337 flags = type & ~SOCK_TYPE_MASK;
77d27200 1338 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1339 return -EINVAL;
1340 type &= SOCK_TYPE_MASK;
1da177e4 1341
aaca0bdc
UD
1342 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1343 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1344
1da177e4
LT
1345 retval = sock_create(family, type, protocol, &sock);
1346 if (retval < 0)
1347 goto out;
1348
77d27200 1349 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1350 if (retval < 0)
1351 goto out_release;
1352
1353out:
1354 /* It may be already another descriptor 8) Not kernel problem. */
1355 return retval;
1356
1357out_release:
1358 sock_release(sock);
1359 return retval;
1360}
1361
1362/*
1363 * Create a pair of connected sockets.
1364 */
1365
3e0fa65f
HC
1366SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1367 int __user *, usockvec)
1da177e4
LT
1368{
1369 struct socket *sock1, *sock2;
1370 int fd1, fd2, err;
db349509 1371 struct file *newfile1, *newfile2;
a677a039
UD
1372 int flags;
1373
1374 flags = type & ~SOCK_TYPE_MASK;
77d27200 1375 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1376 return -EINVAL;
1377 type &= SOCK_TYPE_MASK;
1da177e4 1378
aaca0bdc
UD
1379 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1380 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1381
1da177e4
LT
1382 /*
1383 * Obtain the first socket and check if the underlying protocol
1384 * supports the socketpair call.
1385 */
1386
1387 err = sock_create(family, type, protocol, &sock1);
1388 if (err < 0)
1389 goto out;
1390
1391 err = sock_create(family, type, protocol, &sock2);
1392 if (err < 0)
1393 goto out_release_1;
1394
1395 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1396 if (err < 0)
1da177e4
LT
1397 goto out_release_both;
1398
28407630 1399 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1400 if (unlikely(fd1 < 0)) {
1401 err = fd1;
db349509 1402 goto out_release_both;
bf3c23d1 1403 }
d73aa286 1404
28407630 1405 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1406 if (unlikely(fd2 < 0)) {
1407 err = fd2;
d73aa286 1408 goto out_put_unused_1;
28407630
AV
1409 }
1410
aab174f0 1411 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1412 if (IS_ERR(newfile1)) {
28407630 1413 err = PTR_ERR(newfile1);
d73aa286 1414 goto out_put_unused_both;
28407630
AV
1415 }
1416
aab174f0 1417 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1418 if (IS_ERR(newfile2)) {
1419 err = PTR_ERR(newfile2);
d73aa286 1420 goto out_fput_1;
db349509
AV
1421 }
1422
d73aa286
YD
1423 err = put_user(fd1, &usockvec[0]);
1424 if (err)
1425 goto out_fput_both;
1426
1427 err = put_user(fd2, &usockvec[1]);
1428 if (err)
1429 goto out_fput_both;
1430
157cf649 1431 audit_fd_pair(fd1, fd2);
d73aa286 1432
db349509
AV
1433 fd_install(fd1, newfile1);
1434 fd_install(fd2, newfile2);
1da177e4
LT
1435 /* fd1 and fd2 may be already another descriptors.
1436 * Not kernel problem.
1437 */
1438
d73aa286 1439 return 0;
1da177e4 1440
d73aa286
YD
1441out_fput_both:
1442 fput(newfile2);
1443 fput(newfile1);
1444 put_unused_fd(fd2);
1445 put_unused_fd(fd1);
1446 goto out;
1447
1448out_fput_1:
1449 fput(newfile1);
1450 put_unused_fd(fd2);
1451 put_unused_fd(fd1);
1452 sock_release(sock2);
1453 goto out;
1da177e4 1454
d73aa286
YD
1455out_put_unused_both:
1456 put_unused_fd(fd2);
1457out_put_unused_1:
1458 put_unused_fd(fd1);
1da177e4 1459out_release_both:
89bddce5 1460 sock_release(sock2);
1da177e4 1461out_release_1:
89bddce5 1462 sock_release(sock1);
1da177e4
LT
1463out:
1464 return err;
1465}
1466
1da177e4
LT
1467/*
1468 * Bind a name to a socket. Nothing much to do here since it's
1469 * the protocol's responsibility to handle the local address.
1470 *
1471 * We move the socket address to kernel space before we call
1472 * the protocol layer (having also checked the address is ok).
1473 */
1474
20f37034 1475SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1476{
1477 struct socket *sock;
230b1839 1478 struct sockaddr_storage address;
6cb153ca 1479 int err, fput_needed;
1da177e4 1480
89bddce5 1481 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1482 if (sock) {
43db362d 1483 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1484 if (err >= 0) {
1485 err = security_socket_bind(sock,
230b1839 1486 (struct sockaddr *)&address,
89bddce5 1487 addrlen);
6cb153ca
BL
1488 if (!err)
1489 err = sock->ops->bind(sock,
89bddce5 1490 (struct sockaddr *)
230b1839 1491 &address, addrlen);
1da177e4 1492 }
6cb153ca 1493 fput_light(sock->file, fput_needed);
89bddce5 1494 }
1da177e4
LT
1495 return err;
1496}
1497
1da177e4
LT
1498/*
1499 * Perform a listen. Basically, we allow the protocol to do anything
1500 * necessary for a listen, and if that works, we mark the socket as
1501 * ready for listening.
1502 */
1503
3e0fa65f 1504SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1505{
1506 struct socket *sock;
6cb153ca 1507 int err, fput_needed;
b8e1f9b5 1508 int somaxconn;
89bddce5
SH
1509
1510 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1511 if (sock) {
8efa6e93 1512 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1513 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1514 backlog = somaxconn;
1da177e4
LT
1515
1516 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1517 if (!err)
1518 err = sock->ops->listen(sock, backlog);
1da177e4 1519
6cb153ca 1520 fput_light(sock->file, fput_needed);
1da177e4
LT
1521 }
1522 return err;
1523}
1524
1da177e4
LT
1525/*
1526 * For accept, we attempt to create a new socket, set up the link
1527 * with the client, wake up the client, then return the new
1528 * connected fd. We collect the address of the connector in kernel
1529 * space and move it to user at the very end. This is unclean because
1530 * we open the socket then return an error.
1531 *
1532 * 1003.1g adds the ability to recvmsg() to query connection pending
1533 * status to recvmsg. We need to add that support in a way thats
1534 * clean when we restucture accept also.
1535 */
1536
20f37034
HC
1537SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1538 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1539{
1540 struct socket *sock, *newsock;
39d8c1b6 1541 struct file *newfile;
6cb153ca 1542 int err, len, newfd, fput_needed;
230b1839 1543 struct sockaddr_storage address;
1da177e4 1544
77d27200 1545 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1546 return -EINVAL;
1547
1548 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1549 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1550
6cb153ca 1551 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1552 if (!sock)
1553 goto out;
1554
1555 err = -ENFILE;
c6d409cf
ED
1556 newsock = sock_alloc();
1557 if (!newsock)
1da177e4
LT
1558 goto out_put;
1559
1560 newsock->type = sock->type;
1561 newsock->ops = sock->ops;
1562
1da177e4
LT
1563 /*
1564 * We don't need try_module_get here, as the listening socket (sock)
1565 * has the protocol module (sock->ops->owner) held.
1566 */
1567 __module_get(newsock->ops->owner);
1568
28407630 1569 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1570 if (unlikely(newfd < 0)) {
1571 err = newfd;
9a1875e6
DM
1572 sock_release(newsock);
1573 goto out_put;
39d8c1b6 1574 }
aab174f0 1575 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1576 if (IS_ERR(newfile)) {
28407630
AV
1577 err = PTR_ERR(newfile);
1578 put_unused_fd(newfd);
1579 sock_release(newsock);
1580 goto out_put;
1581 }
39d8c1b6 1582
a79af59e
FF
1583 err = security_socket_accept(sock, newsock);
1584 if (err)
39d8c1b6 1585 goto out_fd;
a79af59e 1586
cdfbabfb 1587 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1588 if (err < 0)
39d8c1b6 1589 goto out_fd;
1da177e4
LT
1590
1591 if (upeer_sockaddr) {
230b1839 1592 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1593 &len, 2) < 0) {
1da177e4 1594 err = -ECONNABORTED;
39d8c1b6 1595 goto out_fd;
1da177e4 1596 }
43db362d 1597 err = move_addr_to_user(&address,
230b1839 1598 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1599 if (err < 0)
39d8c1b6 1600 goto out_fd;
1da177e4
LT
1601 }
1602
1603 /* File flags are not inherited via accept() unlike another OSes. */
1604
39d8c1b6
DM
1605 fd_install(newfd, newfile);
1606 err = newfd;
1da177e4 1607
1da177e4 1608out_put:
6cb153ca 1609 fput_light(sock->file, fput_needed);
1da177e4
LT
1610out:
1611 return err;
39d8c1b6 1612out_fd:
9606a216 1613 fput(newfile);
39d8c1b6 1614 put_unused_fd(newfd);
1da177e4
LT
1615 goto out_put;
1616}
1617
20f37034
HC
1618SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1619 int __user *, upeer_addrlen)
aaca0bdc 1620{
de11defe 1621 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1622}
1623
1da177e4
LT
1624/*
1625 * Attempt to connect to a socket with the server address. The address
1626 * is in user space so we verify it is OK and move it to kernel space.
1627 *
1628 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1629 * break bindings
1630 *
1631 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1632 * other SEQPACKET protocols that take time to connect() as it doesn't
1633 * include the -EINPROGRESS status for such sockets.
1634 */
1635
20f37034
HC
1636SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1637 int, addrlen)
1da177e4
LT
1638{
1639 struct socket *sock;
230b1839 1640 struct sockaddr_storage address;
6cb153ca 1641 int err, fput_needed;
1da177e4 1642
6cb153ca 1643 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1644 if (!sock)
1645 goto out;
43db362d 1646 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1647 if (err < 0)
1648 goto out_put;
1649
89bddce5 1650 err =
230b1839 1651 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1652 if (err)
1653 goto out_put;
1654
230b1839 1655 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1656 sock->file->f_flags);
1657out_put:
6cb153ca 1658 fput_light(sock->file, fput_needed);
1da177e4
LT
1659out:
1660 return err;
1661}
1662
1663/*
1664 * Get the local address ('name') of a socket object. Move the obtained
1665 * name to user space.
1666 */
1667
20f37034
HC
1668SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1669 int __user *, usockaddr_len)
1da177e4
LT
1670{
1671 struct socket *sock;
230b1839 1672 struct sockaddr_storage address;
6cb153ca 1673 int len, err, fput_needed;
89bddce5 1674
6cb153ca 1675 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1676 if (!sock)
1677 goto out;
1678
1679 err = security_socket_getsockname(sock);
1680 if (err)
1681 goto out_put;
1682
230b1839 1683 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1684 if (err)
1685 goto out_put;
43db362d 1686 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1687
1688out_put:
6cb153ca 1689 fput_light(sock->file, fput_needed);
1da177e4
LT
1690out:
1691 return err;
1692}
1693
1694/*
1695 * Get the remote address ('name') of a socket object. Move the obtained
1696 * name to user space.
1697 */
1698
20f37034
HC
1699SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1700 int __user *, usockaddr_len)
1da177e4
LT
1701{
1702 struct socket *sock;
230b1839 1703 struct sockaddr_storage address;
6cb153ca 1704 int len, err, fput_needed;
1da177e4 1705
89bddce5
SH
1706 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1707 if (sock != NULL) {
1da177e4
LT
1708 err = security_socket_getpeername(sock);
1709 if (err) {
6cb153ca 1710 fput_light(sock->file, fput_needed);
1da177e4
LT
1711 return err;
1712 }
1713
89bddce5 1714 err =
230b1839 1715 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1716 1);
1da177e4 1717 if (!err)
43db362d 1718 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1719 usockaddr_len);
6cb153ca 1720 fput_light(sock->file, fput_needed);
1da177e4
LT
1721 }
1722 return err;
1723}
1724
1725/*
1726 * Send a datagram to a given address. We move the address into kernel
1727 * space and check the user space data area is readable before invoking
1728 * the protocol.
1729 */
1730
3e0fa65f 1731SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1732 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1733 int, addr_len)
1da177e4
LT
1734{
1735 struct socket *sock;
230b1839 1736 struct sockaddr_storage address;
1da177e4
LT
1737 int err;
1738 struct msghdr msg;
1739 struct iovec iov;
6cb153ca 1740 int fput_needed;
6cb153ca 1741
602bd0e9
AV
1742 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1743 if (unlikely(err))
1744 return err;
de0fa95c
PE
1745 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1746 if (!sock)
4387ff75 1747 goto out;
6cb153ca 1748
89bddce5 1749 msg.msg_name = NULL;
89bddce5
SH
1750 msg.msg_control = NULL;
1751 msg.msg_controllen = 0;
1752 msg.msg_namelen = 0;
6cb153ca 1753 if (addr) {
43db362d 1754 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1755 if (err < 0)
1756 goto out_put;
230b1839 1757 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1758 msg.msg_namelen = addr_len;
1da177e4
LT
1759 }
1760 if (sock->file->f_flags & O_NONBLOCK)
1761 flags |= MSG_DONTWAIT;
1762 msg.msg_flags = flags;
d8725c86 1763 err = sock_sendmsg(sock, &msg);
1da177e4 1764
89bddce5 1765out_put:
de0fa95c 1766 fput_light(sock->file, fput_needed);
4387ff75 1767out:
1da177e4
LT
1768 return err;
1769}
1770
1771/*
89bddce5 1772 * Send a datagram down a socket.
1da177e4
LT
1773 */
1774
3e0fa65f 1775SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1776 unsigned int, flags)
1da177e4
LT
1777{
1778 return sys_sendto(fd, buff, len, flags, NULL, 0);
1779}
1780
1781/*
89bddce5 1782 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1783 * sender. We verify the buffers are writable and if needed move the
1784 * sender address from kernel to user space.
1785 */
1786
3e0fa65f 1787SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1788 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1789 int __user *, addr_len)
1da177e4
LT
1790{
1791 struct socket *sock;
1792 struct iovec iov;
1793 struct msghdr msg;
230b1839 1794 struct sockaddr_storage address;
89bddce5 1795 int err, err2;
6cb153ca
BL
1796 int fput_needed;
1797
602bd0e9
AV
1798 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1799 if (unlikely(err))
1800 return err;
de0fa95c 1801 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1802 if (!sock)
de0fa95c 1803 goto out;
1da177e4 1804
89bddce5
SH
1805 msg.msg_control = NULL;
1806 msg.msg_controllen = 0;
f3d33426
HFS
1807 /* Save some cycles and don't copy the address if not needed */
1808 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1809 /* We assume all kernel code knows the size of sockaddr_storage */
1810 msg.msg_namelen = 0;
130ed5d1 1811 msg.msg_iocb = NULL;
9f138fa6 1812 msg.msg_flags = 0;
1da177e4
LT
1813 if (sock->file->f_flags & O_NONBLOCK)
1814 flags |= MSG_DONTWAIT;
2da62906 1815 err = sock_recvmsg(sock, &msg, flags);
1da177e4 1816
89bddce5 1817 if (err >= 0 && addr != NULL) {
43db362d 1818 err2 = move_addr_to_user(&address,
230b1839 1819 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1820 if (err2 < 0)
1821 err = err2;
1da177e4 1822 }
de0fa95c
PE
1823
1824 fput_light(sock->file, fput_needed);
4387ff75 1825out:
1da177e4
LT
1826 return err;
1827}
1828
1829/*
89bddce5 1830 * Receive a datagram from a socket.
1da177e4
LT
1831 */
1832
b7c0ddf5
JG
1833SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1834 unsigned int, flags)
1da177e4
LT
1835{
1836 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1837}
1838
1839/*
1840 * Set a socket option. Because we don't know the option lengths we have
1841 * to pass the user mode parameter for the protocols to sort out.
1842 */
1843
20f37034
HC
1844SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1845 char __user *, optval, int, optlen)
1da177e4 1846{
6cb153ca 1847 int err, fput_needed;
1da177e4
LT
1848 struct socket *sock;
1849
1850 if (optlen < 0)
1851 return -EINVAL;
89bddce5
SH
1852
1853 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1854 if (sock != NULL) {
1855 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1856 if (err)
1857 goto out_put;
1da177e4
LT
1858
1859 if (level == SOL_SOCKET)
89bddce5
SH
1860 err =
1861 sock_setsockopt(sock, level, optname, optval,
1862 optlen);
1da177e4 1863 else
89bddce5
SH
1864 err =
1865 sock->ops->setsockopt(sock, level, optname, optval,
1866 optlen);
6cb153ca
BL
1867out_put:
1868 fput_light(sock->file, fput_needed);
1da177e4
LT
1869 }
1870 return err;
1871}
1872
1873/*
1874 * Get a socket option. Because we don't know the option lengths we have
1875 * to pass a user mode parameter for the protocols to sort out.
1876 */
1877
20f37034
HC
1878SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1879 char __user *, optval, int __user *, optlen)
1da177e4 1880{
6cb153ca 1881 int err, fput_needed;
1da177e4
LT
1882 struct socket *sock;
1883
89bddce5
SH
1884 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1885 if (sock != NULL) {
6cb153ca
BL
1886 err = security_socket_getsockopt(sock, level, optname);
1887 if (err)
1888 goto out_put;
1da177e4
LT
1889
1890 if (level == SOL_SOCKET)
89bddce5
SH
1891 err =
1892 sock_getsockopt(sock, level, optname, optval,
1893 optlen);
1da177e4 1894 else
89bddce5
SH
1895 err =
1896 sock->ops->getsockopt(sock, level, optname, optval,
1897 optlen);
6cb153ca
BL
1898out_put:
1899 fput_light(sock->file, fput_needed);
1da177e4
LT
1900 }
1901 return err;
1902}
1903
1da177e4
LT
1904/*
1905 * Shutdown a socket.
1906 */
1907
754fe8d2 1908SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1909{
6cb153ca 1910 int err, fput_needed;
1da177e4
LT
1911 struct socket *sock;
1912
89bddce5
SH
1913 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1914 if (sock != NULL) {
1da177e4 1915 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1916 if (!err)
1917 err = sock->ops->shutdown(sock, how);
1918 fput_light(sock->file, fput_needed);
1da177e4
LT
1919 }
1920 return err;
1921}
1922
89bddce5 1923/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1924 * fields which are the same type (int / unsigned) on our platforms.
1925 */
1926#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1927#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1928#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1929
c71d8ebe
TH
1930struct used_address {
1931 struct sockaddr_storage name;
1932 unsigned int name_len;
1933};
1934
da184284
AV
1935static int copy_msghdr_from_user(struct msghdr *kmsg,
1936 struct user_msghdr __user *umsg,
1937 struct sockaddr __user **save_addr,
1938 struct iovec **iov)
1661bf36 1939{
ffb07550 1940 struct user_msghdr msg;
08adb7da
AV
1941 ssize_t err;
1942
ffb07550 1943 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 1944 return -EFAULT;
dbb490b9 1945
864d9664 1946 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
1947 kmsg->msg_controllen = msg.msg_controllen;
1948 kmsg->msg_flags = msg.msg_flags;
1949
1950 kmsg->msg_namelen = msg.msg_namelen;
1951 if (!msg.msg_name)
6a2a2b3a
AS
1952 kmsg->msg_namelen = 0;
1953
dbb490b9
ML
1954 if (kmsg->msg_namelen < 0)
1955 return -EINVAL;
1956
1661bf36 1957 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1958 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1959
1960 if (save_addr)
ffb07550 1961 *save_addr = msg.msg_name;
08adb7da 1962
ffb07550 1963 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 1964 if (!save_addr) {
864d9664
PA
1965 err = move_addr_to_kernel(msg.msg_name,
1966 kmsg->msg_namelen,
08adb7da
AV
1967 kmsg->msg_name);
1968 if (err < 0)
1969 return err;
1970 }
1971 } else {
1972 kmsg->msg_name = NULL;
1973 kmsg->msg_namelen = 0;
1974 }
1975
ffb07550 1976 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
1977 return -EMSGSIZE;
1978
0345f931 1979 kmsg->msg_iocb = NULL;
1980
ffb07550
AV
1981 return import_iovec(save_addr ? READ : WRITE,
1982 msg.msg_iov, msg.msg_iovlen,
da184284 1983 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1984}
1985
666547ff 1986static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1987 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
1988 struct used_address *used_address,
1989 unsigned int allowed_msghdr_flags)
1da177e4 1990{
89bddce5
SH
1991 struct compat_msghdr __user *msg_compat =
1992 (struct compat_msghdr __user *)msg;
230b1839 1993 struct sockaddr_storage address;
1da177e4 1994 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1995 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 1996 __aligned(sizeof(__kernel_size_t));
89bddce5 1997 /* 20 is size of ipv6_pktinfo */
1da177e4 1998 unsigned char *ctl_buf = ctl;
d8725c86 1999 int ctl_len;
08adb7da 2000 ssize_t err;
89bddce5 2001
08adb7da 2002 msg_sys->msg_name = &address;
1da177e4 2003
08449320 2004 if (MSG_CMSG_COMPAT & flags)
08adb7da 2005 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 2006 else
08adb7da 2007 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 2008 if (err < 0)
da184284 2009 return err;
1da177e4
LT
2010
2011 err = -ENOBUFS;
2012
228e548e 2013 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2014 goto out_freeiov;
28a94d8f 2015 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2016 ctl_len = msg_sys->msg_controllen;
1da177e4 2017 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2018 err =
228e548e 2019 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2020 sizeof(ctl));
1da177e4
LT
2021 if (err)
2022 goto out_freeiov;
228e548e
AB
2023 ctl_buf = msg_sys->msg_control;
2024 ctl_len = msg_sys->msg_controllen;
1da177e4 2025 } else if (ctl_len) {
ac4340fc
DM
2026 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2027 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2028 if (ctl_len > sizeof(ctl)) {
1da177e4 2029 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2030 if (ctl_buf == NULL)
1da177e4
LT
2031 goto out_freeiov;
2032 }
2033 err = -EFAULT;
2034 /*
228e548e 2035 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2036 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2037 * checking falls down on this.
2038 */
fb8621bb 2039 if (copy_from_user(ctl_buf,
228e548e 2040 (void __user __force *)msg_sys->msg_control,
89bddce5 2041 ctl_len))
1da177e4 2042 goto out_freectl;
228e548e 2043 msg_sys->msg_control = ctl_buf;
1da177e4 2044 }
228e548e 2045 msg_sys->msg_flags = flags;
1da177e4
LT
2046
2047 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2048 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2049 /*
2050 * If this is sendmmsg() and current destination address is same as
2051 * previously succeeded address, omit asking LSM's decision.
2052 * used_address->name_len is initialized to UINT_MAX so that the first
2053 * destination address never matches.
2054 */
bc909d9d
MD
2055 if (used_address && msg_sys->msg_name &&
2056 used_address->name_len == msg_sys->msg_namelen &&
2057 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2058 used_address->name_len)) {
d8725c86 2059 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2060 goto out_freectl;
2061 }
d8725c86 2062 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2063 /*
2064 * If this is sendmmsg() and sending to current destination address was
2065 * successful, remember it.
2066 */
2067 if (used_address && err >= 0) {
2068 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2069 if (msg_sys->msg_name)
2070 memcpy(&used_address->name, msg_sys->msg_name,
2071 used_address->name_len);
c71d8ebe 2072 }
1da177e4
LT
2073
2074out_freectl:
89bddce5 2075 if (ctl_buf != ctl)
1da177e4
LT
2076 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2077out_freeiov:
da184284 2078 kfree(iov);
228e548e
AB
2079 return err;
2080}
2081
2082/*
2083 * BSD sendmsg interface
2084 */
2085
666547ff 2086long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
2087{
2088 int fput_needed, err;
2089 struct msghdr msg_sys;
1be374a0
AL
2090 struct socket *sock;
2091
1be374a0 2092 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2093 if (!sock)
2094 goto out;
2095
28a94d8f 2096 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2097
6cb153ca 2098 fput_light(sock->file, fput_needed);
89bddce5 2099out:
1da177e4
LT
2100 return err;
2101}
2102
666547ff 2103SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
2104{
2105 if (flags & MSG_CMSG_COMPAT)
2106 return -EINVAL;
2107 return __sys_sendmsg(fd, msg, flags);
2108}
2109
228e548e
AB
2110/*
2111 * Linux sendmmsg interface
2112 */
2113
2114int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2115 unsigned int flags)
2116{
2117 int fput_needed, err, datagrams;
2118 struct socket *sock;
2119 struct mmsghdr __user *entry;
2120 struct compat_mmsghdr __user *compat_entry;
2121 struct msghdr msg_sys;
c71d8ebe 2122 struct used_address used_address;
f092276d 2123 unsigned int oflags = flags;
228e548e 2124
98382f41
AB
2125 if (vlen > UIO_MAXIOV)
2126 vlen = UIO_MAXIOV;
228e548e
AB
2127
2128 datagrams = 0;
2129
2130 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2131 if (!sock)
2132 return err;
2133
c71d8ebe 2134 used_address.name_len = UINT_MAX;
228e548e
AB
2135 entry = mmsg;
2136 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2137 err = 0;
f092276d 2138 flags |= MSG_BATCH;
228e548e
AB
2139
2140 while (datagrams < vlen) {
f092276d
TH
2141 if (datagrams == vlen - 1)
2142 flags = oflags;
2143
228e548e 2144 if (MSG_CMSG_COMPAT & flags) {
666547ff 2145 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2146 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2147 if (err < 0)
2148 break;
2149 err = __put_user(err, &compat_entry->msg_len);
2150 ++compat_entry;
2151 } else {
a7526eb5 2152 err = ___sys_sendmsg(sock,
666547ff 2153 (struct user_msghdr __user *)entry,
28a94d8f 2154 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2155 if (err < 0)
2156 break;
2157 err = put_user(err, &entry->msg_len);
2158 ++entry;
2159 }
2160
2161 if (err)
2162 break;
2163 ++datagrams;
3023898b
SHY
2164 if (msg_data_left(&msg_sys))
2165 break;
a78cb84c 2166 cond_resched();
228e548e
AB
2167 }
2168
228e548e
AB
2169 fput_light(sock->file, fput_needed);
2170
728ffb86
AB
2171 /* We only return an error if no datagrams were able to be sent */
2172 if (datagrams != 0)
228e548e
AB
2173 return datagrams;
2174
228e548e
AB
2175 return err;
2176}
2177
2178SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2179 unsigned int, vlen, unsigned int, flags)
2180{
1be374a0
AL
2181 if (flags & MSG_CMSG_COMPAT)
2182 return -EINVAL;
228e548e
AB
2183 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2184}
2185
666547ff 2186static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2187 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2188{
89bddce5
SH
2189 struct compat_msghdr __user *msg_compat =
2190 (struct compat_msghdr __user *)msg;
1da177e4 2191 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2192 struct iovec *iov = iovstack;
1da177e4 2193 unsigned long cmsg_ptr;
2da62906 2194 int len;
08adb7da 2195 ssize_t err;
1da177e4
LT
2196
2197 /* kernel mode address */
230b1839 2198 struct sockaddr_storage addr;
1da177e4
LT
2199
2200 /* user mode address pointers */
2201 struct sockaddr __user *uaddr;
08adb7da 2202 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2203
08adb7da 2204 msg_sys->msg_name = &addr;
1da177e4 2205
f3d33426 2206 if (MSG_CMSG_COMPAT & flags)
08adb7da 2207 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2208 else
08adb7da 2209 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2210 if (err < 0)
da184284 2211 return err;
1da177e4 2212
a2e27255
ACM
2213 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2214 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2215
f3d33426
HFS
2216 /* We assume all kernel code knows the size of sockaddr_storage */
2217 msg_sys->msg_namelen = 0;
2218
1da177e4
LT
2219 if (sock->file->f_flags & O_NONBLOCK)
2220 flags |= MSG_DONTWAIT;
2da62906 2221 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2222 if (err < 0)
2223 goto out_freeiov;
2224 len = err;
2225
2226 if (uaddr != NULL) {
43db362d 2227 err = move_addr_to_user(&addr,
a2e27255 2228 msg_sys->msg_namelen, uaddr,
89bddce5 2229 uaddr_len);
1da177e4
LT
2230 if (err < 0)
2231 goto out_freeiov;
2232 }
a2e27255 2233 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2234 COMPAT_FLAGS(msg));
1da177e4
LT
2235 if (err)
2236 goto out_freeiov;
2237 if (MSG_CMSG_COMPAT & flags)
a2e27255 2238 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2239 &msg_compat->msg_controllen);
2240 else
a2e27255 2241 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2242 &msg->msg_controllen);
2243 if (err)
2244 goto out_freeiov;
2245 err = len;
2246
2247out_freeiov:
da184284 2248 kfree(iov);
a2e27255
ACM
2249 return err;
2250}
2251
2252/*
2253 * BSD recvmsg interface
2254 */
2255
666547ff 2256long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2257{
2258 int fput_needed, err;
2259 struct msghdr msg_sys;
1be374a0
AL
2260 struct socket *sock;
2261
1be374a0 2262 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2263 if (!sock)
2264 goto out;
2265
a7526eb5 2266 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2267
6cb153ca 2268 fput_light(sock->file, fput_needed);
1da177e4
LT
2269out:
2270 return err;
2271}
2272
666547ff 2273SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2274 unsigned int, flags)
2275{
2276 if (flags & MSG_CMSG_COMPAT)
2277 return -EINVAL;
2278 return __sys_recvmsg(fd, msg, flags);
2279}
2280
a2e27255
ACM
2281/*
2282 * Linux recvmmsg interface
2283 */
2284
2285int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2286 unsigned int flags, struct timespec *timeout)
2287{
2288 int fput_needed, err, datagrams;
2289 struct socket *sock;
2290 struct mmsghdr __user *entry;
d7256d0e 2291 struct compat_mmsghdr __user *compat_entry;
a2e27255 2292 struct msghdr msg_sys;
766b9f92
DD
2293 struct timespec64 end_time;
2294 struct timespec64 timeout64;
a2e27255
ACM
2295
2296 if (timeout &&
2297 poll_select_set_timeout(&end_time, timeout->tv_sec,
2298 timeout->tv_nsec))
2299 return -EINVAL;
2300
2301 datagrams = 0;
2302
2303 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2304 if (!sock)
2305 return err;
2306
2307 err = sock_error(sock->sk);
e623a9e9
MJ
2308 if (err) {
2309 datagrams = err;
a2e27255 2310 goto out_put;
e623a9e9 2311 }
a2e27255
ACM
2312
2313 entry = mmsg;
d7256d0e 2314 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2315
2316 while (datagrams < vlen) {
2317 /*
2318 * No need to ask LSM for more than the first datagram.
2319 */
d7256d0e 2320 if (MSG_CMSG_COMPAT & flags) {
666547ff 2321 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2322 &msg_sys, flags & ~MSG_WAITFORONE,
2323 datagrams);
d7256d0e
JMG
2324 if (err < 0)
2325 break;
2326 err = __put_user(err, &compat_entry->msg_len);
2327 ++compat_entry;
2328 } else {
a7526eb5 2329 err = ___sys_recvmsg(sock,
666547ff 2330 (struct user_msghdr __user *)entry,
a7526eb5
AL
2331 &msg_sys, flags & ~MSG_WAITFORONE,
2332 datagrams);
d7256d0e
JMG
2333 if (err < 0)
2334 break;
2335 err = put_user(err, &entry->msg_len);
2336 ++entry;
2337 }
2338
a2e27255
ACM
2339 if (err)
2340 break;
a2e27255
ACM
2341 ++datagrams;
2342
71c5c159
BB
2343 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2344 if (flags & MSG_WAITFORONE)
2345 flags |= MSG_DONTWAIT;
2346
a2e27255 2347 if (timeout) {
766b9f92
DD
2348 ktime_get_ts64(&timeout64);
2349 *timeout = timespec64_to_timespec(
2350 timespec64_sub(end_time, timeout64));
a2e27255
ACM
2351 if (timeout->tv_sec < 0) {
2352 timeout->tv_sec = timeout->tv_nsec = 0;
2353 break;
2354 }
2355
2356 /* Timeout, return less than vlen datagrams */
2357 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2358 break;
2359 }
2360
2361 /* Out of band data, return right away */
2362 if (msg_sys.msg_flags & MSG_OOB)
2363 break;
a78cb84c 2364 cond_resched();
a2e27255
ACM
2365 }
2366
a2e27255 2367 if (err == 0)
34b88a68
ACM
2368 goto out_put;
2369
2370 if (datagrams == 0) {
2371 datagrams = err;
2372 goto out_put;
2373 }
a2e27255 2374
34b88a68
ACM
2375 /*
2376 * We may return less entries than requested (vlen) if the
2377 * sock is non block and there aren't enough datagrams...
2378 */
2379 if (err != -EAGAIN) {
a2e27255 2380 /*
34b88a68
ACM
2381 * ... or if recvmsg returns an error after we
2382 * received some datagrams, where we record the
2383 * error to return on the next call or if the
2384 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2385 */
34b88a68 2386 sock->sk->sk_err = -err;
a2e27255 2387 }
34b88a68
ACM
2388out_put:
2389 fput_light(sock->file, fput_needed);
a2e27255 2390
34b88a68 2391 return datagrams;
a2e27255
ACM
2392}
2393
2394SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2395 unsigned int, vlen, unsigned int, flags,
2396 struct timespec __user *, timeout)
2397{
2398 int datagrams;
2399 struct timespec timeout_sys;
2400
1be374a0
AL
2401 if (flags & MSG_CMSG_COMPAT)
2402 return -EINVAL;
2403
a2e27255
ACM
2404 if (!timeout)
2405 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2406
2407 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2408 return -EFAULT;
2409
2410 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2411
2412 if (datagrams > 0 &&
2413 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2414 datagrams = -EFAULT;
2415
2416 return datagrams;
2417}
2418
2419#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2420/* Argument list sizes for sys_socketcall */
2421#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2422static const unsigned char nargs[21] = {
c6d409cf
ED
2423 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2424 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2425 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2426 AL(4), AL(5), AL(4)
89bddce5
SH
2427};
2428
1da177e4
LT
2429#undef AL
2430
2431/*
89bddce5 2432 * System call vectors.
1da177e4
LT
2433 *
2434 * Argument checking cleaned up. Saved 20% in size.
2435 * This function doesn't need to set the kernel lock because
89bddce5 2436 * it is set by the callees.
1da177e4
LT
2437 */
2438
3e0fa65f 2439SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2440{
2950fa9d 2441 unsigned long a[AUDITSC_ARGS];
89bddce5 2442 unsigned long a0, a1;
1da177e4 2443 int err;
47379052 2444 unsigned int len;
1da177e4 2445
228e548e 2446 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2447 return -EINVAL;
45c8178c 2448 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2449
47379052
AV
2450 len = nargs[call];
2451 if (len > sizeof(a))
2452 return -EINVAL;
2453
1da177e4 2454 /* copy_from_user should be SMP safe. */
47379052 2455 if (copy_from_user(a, args, len))
1da177e4 2456 return -EFAULT;
3ec3b2fb 2457
2950fa9d
CG
2458 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2459 if (err)
2460 return err;
3ec3b2fb 2461
89bddce5
SH
2462 a0 = a[0];
2463 a1 = a[1];
2464
2465 switch (call) {
2466 case SYS_SOCKET:
2467 err = sys_socket(a0, a1, a[2]);
2468 break;
2469 case SYS_BIND:
2470 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2471 break;
2472 case SYS_CONNECT:
2473 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2474 break;
2475 case SYS_LISTEN:
2476 err = sys_listen(a0, a1);
2477 break;
2478 case SYS_ACCEPT:
de11defe
UD
2479 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2480 (int __user *)a[2], 0);
89bddce5
SH
2481 break;
2482 case SYS_GETSOCKNAME:
2483 err =
2484 sys_getsockname(a0, (struct sockaddr __user *)a1,
2485 (int __user *)a[2]);
2486 break;
2487 case SYS_GETPEERNAME:
2488 err =
2489 sys_getpeername(a0, (struct sockaddr __user *)a1,
2490 (int __user *)a[2]);
2491 break;
2492 case SYS_SOCKETPAIR:
2493 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2494 break;
2495 case SYS_SEND:
2496 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2497 break;
2498 case SYS_SENDTO:
2499 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2500 (struct sockaddr __user *)a[4], a[5]);
2501 break;
2502 case SYS_RECV:
2503 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2504 break;
2505 case SYS_RECVFROM:
2506 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2507 (struct sockaddr __user *)a[4],
2508 (int __user *)a[5]);
2509 break;
2510 case SYS_SHUTDOWN:
2511 err = sys_shutdown(a0, a1);
2512 break;
2513 case SYS_SETSOCKOPT:
2514 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2515 break;
2516 case SYS_GETSOCKOPT:
2517 err =
2518 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2519 (int __user *)a[4]);
2520 break;
2521 case SYS_SENDMSG:
666547ff 2522 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2523 break;
228e548e
AB
2524 case SYS_SENDMMSG:
2525 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2526 break;
89bddce5 2527 case SYS_RECVMSG:
666547ff 2528 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2529 break;
a2e27255
ACM
2530 case SYS_RECVMMSG:
2531 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2532 (struct timespec __user *)a[4]);
2533 break;
de11defe
UD
2534 case SYS_ACCEPT4:
2535 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2536 (int __user *)a[2], a[3]);
aaca0bdc 2537 break;
89bddce5
SH
2538 default:
2539 err = -EINVAL;
2540 break;
1da177e4
LT
2541 }
2542 return err;
2543}
2544
89bddce5 2545#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2546
55737fda
SH
2547/**
2548 * sock_register - add a socket protocol handler
2549 * @ops: description of protocol
2550 *
1da177e4
LT
2551 * This function is called by a protocol handler that wants to
2552 * advertise its address family, and have it linked into the
e793c0f7 2553 * socket interface. The value ops->family corresponds to the
55737fda 2554 * socket system call protocol family.
1da177e4 2555 */
f0fd27d4 2556int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2557{
2558 int err;
2559
2560 if (ops->family >= NPROTO) {
3410f22e 2561 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2562 return -ENOBUFS;
2563 }
55737fda
SH
2564
2565 spin_lock(&net_family_lock);
190683a9
ED
2566 if (rcu_dereference_protected(net_families[ops->family],
2567 lockdep_is_held(&net_family_lock)))
55737fda
SH
2568 err = -EEXIST;
2569 else {
cf778b00 2570 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2571 err = 0;
2572 }
55737fda
SH
2573 spin_unlock(&net_family_lock);
2574
3410f22e 2575 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2576 return err;
2577}
c6d409cf 2578EXPORT_SYMBOL(sock_register);
1da177e4 2579
55737fda
SH
2580/**
2581 * sock_unregister - remove a protocol handler
2582 * @family: protocol family to remove
2583 *
1da177e4
LT
2584 * This function is called by a protocol handler that wants to
2585 * remove its address family, and have it unlinked from the
55737fda
SH
2586 * new socket creation.
2587 *
2588 * If protocol handler is a module, then it can use module reference
2589 * counts to protect against new references. If protocol handler is not
2590 * a module then it needs to provide its own protection in
2591 * the ops->create routine.
1da177e4 2592 */
f0fd27d4 2593void sock_unregister(int family)
1da177e4 2594{
f0fd27d4 2595 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2596
55737fda 2597 spin_lock(&net_family_lock);
a9b3cd7f 2598 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2599 spin_unlock(&net_family_lock);
2600
2601 synchronize_rcu();
2602
3410f22e 2603 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2604}
c6d409cf 2605EXPORT_SYMBOL(sock_unregister);
1da177e4 2606
77d76ea3 2607static int __init sock_init(void)
1da177e4 2608{
b3e19d92 2609 int err;
2ca794e5
EB
2610 /*
2611 * Initialize the network sysctl infrastructure.
2612 */
2613 err = net_sysctl_init();
2614 if (err)
2615 goto out;
b3e19d92 2616
1da177e4 2617 /*
89bddce5 2618 * Initialize skbuff SLAB cache
1da177e4
LT
2619 */
2620 skb_init();
1da177e4
LT
2621
2622 /*
89bddce5 2623 * Initialize the protocols module.
1da177e4
LT
2624 */
2625
2626 init_inodecache();
b3e19d92
NP
2627
2628 err = register_filesystem(&sock_fs_type);
2629 if (err)
2630 goto out_fs;
1da177e4 2631 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2632 if (IS_ERR(sock_mnt)) {
2633 err = PTR_ERR(sock_mnt);
2634 goto out_mount;
2635 }
77d76ea3
AK
2636
2637 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2638 */
2639
2640#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2641 err = netfilter_init();
2642 if (err)
2643 goto out;
1da177e4 2644#endif
cbeb321a 2645
408eccce 2646 ptp_classifier_init();
c1f19b51 2647
b3e19d92
NP
2648out:
2649 return err;
2650
2651out_mount:
2652 unregister_filesystem(&sock_fs_type);
2653out_fs:
2654 goto out;
1da177e4
LT
2655}
2656
77d76ea3
AK
2657core_initcall(sock_init); /* early initcall */
2658
1da177e4
LT
2659#ifdef CONFIG_PROC_FS
2660void socket_seq_show(struct seq_file *seq)
2661{
2662 int cpu;
2663 int counter = 0;
2664
6f912042 2665 for_each_possible_cpu(cpu)
89bddce5 2666 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2667
2668 /* It can be negative, by the way. 8) */
2669 if (counter < 0)
2670 counter = 0;
2671
2672 seq_printf(seq, "sockets: used %d\n", counter);
2673}
89bddce5 2674#endif /* CONFIG_PROC_FS */
1da177e4 2675
89bbfc95 2676#ifdef CONFIG_COMPAT
6b96018b 2677static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2678 unsigned int cmd, void __user *up)
7a229387 2679{
7a229387
AB
2680 mm_segment_t old_fs = get_fs();
2681 struct timeval ktv;
2682 int err;
2683
2684 set_fs(KERNEL_DS);
6b96018b 2685 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2686 set_fs(old_fs);
644595f8 2687 if (!err)
ed6fe9d6 2688 err = compat_put_timeval(&ktv, up);
644595f8 2689
7a229387
AB
2690 return err;
2691}
2692
6b96018b 2693static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2694 unsigned int cmd, void __user *up)
7a229387 2695{
7a229387
AB
2696 mm_segment_t old_fs = get_fs();
2697 struct timespec kts;
2698 int err;
2699
2700 set_fs(KERNEL_DS);
6b96018b 2701 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2702 set_fs(old_fs);
644595f8 2703 if (!err)
ed6fe9d6 2704 err = compat_put_timespec(&kts, up);
644595f8 2705
7a229387
AB
2706 return err;
2707}
2708
6b96018b 2709static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2710{
2711 struct ifreq __user *uifr;
2712 int err;
2713
2714 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2715 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2716 return -EFAULT;
2717
6b96018b 2718 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2719 if (err)
2720 return err;
2721
6b96018b 2722 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2723 return -EFAULT;
2724
2725 return 0;
2726}
2727
6b96018b 2728static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2729{
6b96018b 2730 struct compat_ifconf ifc32;
7a229387
AB
2731 struct ifconf ifc;
2732 struct ifconf __user *uifc;
6b96018b 2733 struct compat_ifreq __user *ifr32;
7a229387
AB
2734 struct ifreq __user *ifr;
2735 unsigned int i, j;
2736 int err;
2737
6b96018b 2738 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2739 return -EFAULT;
2740
43da5f2e 2741 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2742 if (ifc32.ifcbuf == 0) {
2743 ifc32.ifc_len = 0;
2744 ifc.ifc_len = 0;
2745 ifc.ifc_req = NULL;
2746 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2747 } else {
c6d409cf
ED
2748 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2749 sizeof(struct ifreq);
7a229387
AB
2750 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2751 ifc.ifc_len = len;
2752 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2753 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2754 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2755 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2756 return -EFAULT;
2757 ifr++;
2758 ifr32++;
2759 }
2760 }
2761 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2762 return -EFAULT;
2763
6b96018b 2764 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2765 if (err)
2766 return err;
2767
2768 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2769 return -EFAULT;
2770
2771 ifr = ifc.ifc_req;
2772 ifr32 = compat_ptr(ifc32.ifcbuf);
2773 for (i = 0, j = 0;
c6d409cf
ED
2774 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2775 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2776 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2777 return -EFAULT;
2778 ifr32++;
2779 ifr++;
2780 }
2781
2782 if (ifc32.ifcbuf == 0) {
2783 /* Translate from 64-bit structure multiple to
2784 * a 32-bit one.
2785 */
2786 i = ifc.ifc_len;
6b96018b 2787 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2788 ifc32.ifc_len = i;
2789 } else {
2790 ifc32.ifc_len = i;
2791 }
6b96018b 2792 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2793 return -EFAULT;
2794
2795 return 0;
2796}
2797
6b96018b 2798static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2799{
3a7da39d
BH
2800 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2801 bool convert_in = false, convert_out = false;
2802 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2803 struct ethtool_rxnfc __user *rxnfc;
7a229387 2804 struct ifreq __user *ifr;
3a7da39d
BH
2805 u32 rule_cnt = 0, actual_rule_cnt;
2806 u32 ethcmd;
7a229387 2807 u32 data;
3a7da39d 2808 int ret;
7a229387 2809
3a7da39d
BH
2810 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2811 return -EFAULT;
7a229387 2812
3a7da39d
BH
2813 compat_rxnfc = compat_ptr(data);
2814
2815 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2816 return -EFAULT;
2817
3a7da39d
BH
2818 /* Most ethtool structures are defined without padding.
2819 * Unfortunately struct ethtool_rxnfc is an exception.
2820 */
2821 switch (ethcmd) {
2822 default:
2823 break;
2824 case ETHTOOL_GRXCLSRLALL:
2825 /* Buffer size is variable */
2826 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2827 return -EFAULT;
2828 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2829 return -ENOMEM;
2830 buf_size += rule_cnt * sizeof(u32);
2831 /* fall through */
2832 case ETHTOOL_GRXRINGS:
2833 case ETHTOOL_GRXCLSRLCNT:
2834 case ETHTOOL_GRXCLSRULE:
55664f32 2835 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2836 convert_out = true;
2837 /* fall through */
2838 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2839 buf_size += sizeof(struct ethtool_rxnfc);
2840 convert_in = true;
2841 break;
2842 }
2843
2844 ifr = compat_alloc_user_space(buf_size);
954b1244 2845 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2846
2847 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2848 return -EFAULT;
2849
3a7da39d
BH
2850 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2851 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2852 return -EFAULT;
2853
3a7da39d 2854 if (convert_in) {
127fe533 2855 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2856 * fs.ring_cookie and at the end of fs, but nowhere else.
2857 */
127fe533
AD
2858 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2859 sizeof(compat_rxnfc->fs.m_ext) !=
2860 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2861 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2862 BUILD_BUG_ON(
2863 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2864 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2865 offsetof(struct ethtool_rxnfc, fs.location) -
2866 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2867
2868 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2869 (void __user *)(&rxnfc->fs.m_ext + 1) -
2870 (void __user *)rxnfc) ||
3a7da39d
BH
2871 copy_in_user(&rxnfc->fs.ring_cookie,
2872 &compat_rxnfc->fs.ring_cookie,
954b1244 2873 (void __user *)(&rxnfc->fs.location + 1) -
7d584568
WW
2874 (void __user *)&rxnfc->fs.ring_cookie))
2875 return -EFAULT;
2876 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2877 if (put_user(rule_cnt, &rxnfc->rule_cnt))
2878 return -EFAULT;
2879 } else if (copy_in_user(&rxnfc->rule_cnt,
2880 &compat_rxnfc->rule_cnt,
2881 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
2882 return -EFAULT;
2883 }
2884
2885 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2886 if (ret)
2887 return ret;
2888
2889 if (convert_out) {
2890 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2891 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2892 (const void __user *)rxnfc) ||
3a7da39d
BH
2893 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2894 &rxnfc->fs.ring_cookie,
954b1244
SH
2895 (const void __user *)(&rxnfc->fs.location + 1) -
2896 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2897 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2898 sizeof(rxnfc->rule_cnt)))
2899 return -EFAULT;
2900
2901 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2902 /* As an optimisation, we only copy the actual
2903 * number of rules that the underlying
2904 * function returned. Since Mallory might
2905 * change the rule count in user memory, we
2906 * check that it is less than the rule count
2907 * originally given (as the user buffer size),
2908 * which has been range-checked.
2909 */
2910 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2911 return -EFAULT;
2912 if (actual_rule_cnt < rule_cnt)
2913 rule_cnt = actual_rule_cnt;
2914 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2915 &rxnfc->rule_locs[0],
2916 rule_cnt * sizeof(u32)))
2917 return -EFAULT;
2918 }
2919 }
2920
2921 return 0;
7a229387
AB
2922}
2923
7a50a240
AB
2924static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2925{
2926 void __user *uptr;
2927 compat_uptr_t uptr32;
2928 struct ifreq __user *uifr;
2929
c6d409cf 2930 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2931 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2932 return -EFAULT;
2933
2934 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2935 return -EFAULT;
2936
2937 uptr = compat_ptr(uptr32);
2938
2939 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2940 return -EFAULT;
2941
2942 return dev_ioctl(net, SIOCWANDEV, uifr);
2943}
2944
6b96018b
AB
2945static int bond_ioctl(struct net *net, unsigned int cmd,
2946 struct compat_ifreq __user *ifr32)
7a229387
AB
2947{
2948 struct ifreq kifr;
7a229387
AB
2949 mm_segment_t old_fs;
2950 int err;
7a229387
AB
2951
2952 switch (cmd) {
2953 case SIOCBONDENSLAVE:
2954 case SIOCBONDRELEASE:
2955 case SIOCBONDSETHWADDR:
2956 case SIOCBONDCHANGEACTIVE:
6b96018b 2957 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2958 return -EFAULT;
2959
2960 old_fs = get_fs();
c6d409cf 2961 set_fs(KERNEL_DS);
c3f52ae6 2962 err = dev_ioctl(net, cmd,
2963 (struct ifreq __user __force *) &kifr);
c6d409cf 2964 set_fs(old_fs);
7a229387
AB
2965
2966 return err;
7a229387 2967 default:
07d106d0 2968 return -ENOIOCTLCMD;
ccbd6a5a 2969 }
7a229387
AB
2970}
2971
590d4693
BH
2972/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2973static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2974 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2975{
2976 struct ifreq __user *u_ifreq64;
7a229387
AB
2977 char tmp_buf[IFNAMSIZ];
2978 void __user *data64;
2979 u32 data32;
2980
2981 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2982 IFNAMSIZ))
2983 return -EFAULT;
417c3522 2984 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2985 return -EFAULT;
2986 data64 = compat_ptr(data32);
2987
2988 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2989
7a229387
AB
2990 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2991 IFNAMSIZ))
2992 return -EFAULT;
417c3522 2993 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2994 return -EFAULT;
2995
6b96018b 2996 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2997}
2998
6b96018b
AB
2999static int dev_ifsioc(struct net *net, struct socket *sock,
3000 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 3001{
a2116ed2 3002 struct ifreq __user *uifr;
7a229387
AB
3003 int err;
3004
a2116ed2
AB
3005 uifr = compat_alloc_user_space(sizeof(*uifr));
3006 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3007 return -EFAULT;
3008
3009 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3010
7a229387
AB
3011 if (!err) {
3012 switch (cmd) {
3013 case SIOCGIFFLAGS:
3014 case SIOCGIFMETRIC:
3015 case SIOCGIFMTU:
3016 case SIOCGIFMEM:
3017 case SIOCGIFHWADDR:
3018 case SIOCGIFINDEX:
3019 case SIOCGIFADDR:
3020 case SIOCGIFBRDADDR:
3021 case SIOCGIFDSTADDR:
3022 case SIOCGIFNETMASK:
fab2532b 3023 case SIOCGIFPFLAGS:
7a229387 3024 case SIOCGIFTXQLEN:
fab2532b
AB
3025 case SIOCGMIIPHY:
3026 case SIOCGMIIREG:
a2116ed2 3027 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
3028 err = -EFAULT;
3029 break;
3030 }
3031 }
3032 return err;
3033}
3034
a2116ed2
AB
3035static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3036 struct compat_ifreq __user *uifr32)
3037{
3038 struct ifreq ifr;
3039 struct compat_ifmap __user *uifmap32;
3040 mm_segment_t old_fs;
3041 int err;
3042
3043 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3044 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3045 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3046 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3047 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3048 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3049 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3050 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3051 if (err)
3052 return -EFAULT;
3053
3054 old_fs = get_fs();
c6d409cf 3055 set_fs(KERNEL_DS);
c3f52ae6 3056 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 3057 set_fs(old_fs);
a2116ed2
AB
3058
3059 if (cmd == SIOCGIFMAP && !err) {
3060 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3061 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3062 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3063 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3064 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3065 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3066 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3067 if (err)
3068 err = -EFAULT;
3069 }
3070 return err;
3071}
3072
7a229387 3073struct rtentry32 {
c6d409cf 3074 u32 rt_pad1;
7a229387
AB
3075 struct sockaddr rt_dst; /* target address */
3076 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3077 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3078 unsigned short rt_flags;
3079 short rt_pad2;
3080 u32 rt_pad3;
3081 unsigned char rt_tos;
3082 unsigned char rt_class;
3083 short rt_pad4;
3084 short rt_metric; /* +1 for binary compatibility! */
7a229387 3085 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3086 u32 rt_mtu; /* per route MTU/Window */
3087 u32 rt_window; /* Window clamping */
7a229387
AB
3088 unsigned short rt_irtt; /* Initial RTT */
3089};
3090
3091struct in6_rtmsg32 {
3092 struct in6_addr rtmsg_dst;
3093 struct in6_addr rtmsg_src;
3094 struct in6_addr rtmsg_gateway;
3095 u32 rtmsg_type;
3096 u16 rtmsg_dst_len;
3097 u16 rtmsg_src_len;
3098 u32 rtmsg_metric;
3099 u32 rtmsg_info;
3100 u32 rtmsg_flags;
3101 s32 rtmsg_ifindex;
3102};
3103
6b96018b
AB
3104static int routing_ioctl(struct net *net, struct socket *sock,
3105 unsigned int cmd, void __user *argp)
7a229387
AB
3106{
3107 int ret;
3108 void *r = NULL;
3109 struct in6_rtmsg r6;
3110 struct rtentry r4;
3111 char devname[16];
3112 u32 rtdev;
3113 mm_segment_t old_fs = get_fs();
3114
6b96018b
AB
3115 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3116 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3117 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3118 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3119 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3120 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3121 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3122 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3123 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3124 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3125 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3126
3127 r = (void *) &r6;
3128 } else { /* ipv4 */
6b96018b 3129 struct rtentry32 __user *ur4 = argp;
c6d409cf 3130 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3131 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3132 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3133 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3134 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3135 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3136 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3137 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3138 if (rtdev) {
c6d409cf 3139 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3140 r4.rt_dev = (char __user __force *)devname;
3141 devname[15] = 0;
7a229387
AB
3142 } else
3143 r4.rt_dev = NULL;
3144
3145 r = (void *) &r4;
3146 }
3147
3148 if (ret) {
3149 ret = -EFAULT;
3150 goto out;
3151 }
3152
c6d409cf 3153 set_fs(KERNEL_DS);
6b96018b 3154 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3155 set_fs(old_fs);
7a229387
AB
3156
3157out:
7a229387
AB
3158 return ret;
3159}
3160
3161/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3162 * for some operations; this forces use of the newer bridge-utils that
25985edc 3163 * use compatible ioctls
7a229387 3164 */
6b96018b 3165static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3166{
6b96018b 3167 compat_ulong_t tmp;
7a229387 3168
6b96018b 3169 if (get_user(tmp, argp))
7a229387
AB
3170 return -EFAULT;
3171 if (tmp == BRCTL_GET_VERSION)
3172 return BRCTL_VERSION + 1;
3173 return -EINVAL;
3174}
3175
6b96018b
AB
3176static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3177 unsigned int cmd, unsigned long arg)
3178{
3179 void __user *argp = compat_ptr(arg);
3180 struct sock *sk = sock->sk;
3181 struct net *net = sock_net(sk);
7a229387 3182
6b96018b 3183 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3184 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3185
3186 switch (cmd) {
3187 case SIOCSIFBR:
3188 case SIOCGIFBR:
3189 return old_bridge_ioctl(argp);
3190 case SIOCGIFNAME:
3191 return dev_ifname32(net, argp);
3192 case SIOCGIFCONF:
3193 return dev_ifconf(net, argp);
3194 case SIOCETHTOOL:
3195 return ethtool_ioctl(net, argp);
7a50a240
AB
3196 case SIOCWANDEV:
3197 return compat_siocwandev(net, argp);
a2116ed2
AB
3198 case SIOCGIFMAP:
3199 case SIOCSIFMAP:
3200 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3201 case SIOCBONDENSLAVE:
3202 case SIOCBONDRELEASE:
3203 case SIOCBONDSETHWADDR:
6b96018b
AB
3204 case SIOCBONDCHANGEACTIVE:
3205 return bond_ioctl(net, cmd, argp);
3206 case SIOCADDRT:
3207 case SIOCDELRT:
3208 return routing_ioctl(net, sock, cmd, argp);
3209 case SIOCGSTAMP:
3210 return do_siocgstamp(net, sock, cmd, argp);
3211 case SIOCGSTAMPNS:
3212 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3213 case SIOCBONDSLAVEINFOQUERY:
3214 case SIOCBONDINFOQUERY:
a2116ed2 3215 case SIOCSHWTSTAMP:
fd468c74 3216 case SIOCGHWTSTAMP:
590d4693 3217 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3218
3219 case FIOSETOWN:
3220 case SIOCSPGRP:
3221 case FIOGETOWN:
3222 case SIOCGPGRP:
3223 case SIOCBRADDBR:
3224 case SIOCBRDELBR:
3225 case SIOCGIFVLAN:
3226 case SIOCSIFVLAN:
3227 case SIOCADDDLCI:
3228 case SIOCDELDLCI:
c62cce2c 3229 case SIOCGSKNS:
6b96018b
AB
3230 return sock_ioctl(file, cmd, arg);
3231
3232 case SIOCGIFFLAGS:
3233 case SIOCSIFFLAGS:
3234 case SIOCGIFMETRIC:
3235 case SIOCSIFMETRIC:
3236 case SIOCGIFMTU:
3237 case SIOCSIFMTU:
3238 case SIOCGIFMEM:
3239 case SIOCSIFMEM:
3240 case SIOCGIFHWADDR:
3241 case SIOCSIFHWADDR:
3242 case SIOCADDMULTI:
3243 case SIOCDELMULTI:
3244 case SIOCGIFINDEX:
6b96018b
AB
3245 case SIOCGIFADDR:
3246 case SIOCSIFADDR:
3247 case SIOCSIFHWBROADCAST:
6b96018b 3248 case SIOCDIFADDR:
6b96018b
AB
3249 case SIOCGIFBRDADDR:
3250 case SIOCSIFBRDADDR:
3251 case SIOCGIFDSTADDR:
3252 case SIOCSIFDSTADDR:
3253 case SIOCGIFNETMASK:
3254 case SIOCSIFNETMASK:
3255 case SIOCSIFPFLAGS:
3256 case SIOCGIFPFLAGS:
3257 case SIOCGIFTXQLEN:
3258 case SIOCSIFTXQLEN:
3259 case SIOCBRADDIF:
3260 case SIOCBRDELIF:
9177efd3
AB
3261 case SIOCSIFNAME:
3262 case SIOCGMIIPHY:
3263 case SIOCGMIIREG:
3264 case SIOCSMIIREG:
6b96018b 3265 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3266
6b96018b
AB
3267 case SIOCSARP:
3268 case SIOCGARP:
3269 case SIOCDARP:
2e3f1f15 3270 case SIOCOUTQNSD:
6b96018b 3271 case SIOCATMARK:
9177efd3
AB
3272 return sock_do_ioctl(net, sock, cmd, arg);
3273 }
3274
6b96018b
AB
3275 return -ENOIOCTLCMD;
3276}
7a229387 3277
95c96174 3278static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3279 unsigned long arg)
89bbfc95
SP
3280{
3281 struct socket *sock = file->private_data;
3282 int ret = -ENOIOCTLCMD;
87de87d5
DM
3283 struct sock *sk;
3284 struct net *net;
3285
3286 sk = sock->sk;
3287 net = sock_net(sk);
89bbfc95
SP
3288
3289 if (sock->ops->compat_ioctl)
3290 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3291
87de87d5
DM
3292 if (ret == -ENOIOCTLCMD &&
3293 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3294 ret = compat_wext_handle_ioctl(net, cmd, arg);
3295
6b96018b
AB
3296 if (ret == -ENOIOCTLCMD)
3297 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3298
89bbfc95
SP
3299 return ret;
3300}
3301#endif
3302
ac5a488e
SS
3303int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3304{
3305 return sock->ops->bind(sock, addr, addrlen);
3306}
c6d409cf 3307EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3308
3309int kernel_listen(struct socket *sock, int backlog)
3310{
3311 return sock->ops->listen(sock, backlog);
3312}
c6d409cf 3313EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3314
3315int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3316{
3317 struct sock *sk = sock->sk;
3318 int err;
3319
3320 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3321 newsock);
3322 if (err < 0)
3323 goto done;
3324
cdfbabfb 3325 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3326 if (err < 0) {
3327 sock_release(*newsock);
fa8705b0 3328 *newsock = NULL;
ac5a488e
SS
3329 goto done;
3330 }
3331
3332 (*newsock)->ops = sock->ops;
1b08534e 3333 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3334
3335done:
3336 return err;
3337}
c6d409cf 3338EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3339
3340int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3341 int flags)
ac5a488e
SS
3342{
3343 return sock->ops->connect(sock, addr, addrlen, flags);
3344}
c6d409cf 3345EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3346
3347int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3348 int *addrlen)
3349{
3350 return sock->ops->getname(sock, addr, addrlen, 0);
3351}
c6d409cf 3352EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3353
3354int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3355 int *addrlen)
3356{
3357 return sock->ops->getname(sock, addr, addrlen, 1);
3358}
c6d409cf 3359EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3360
3361int kernel_getsockopt(struct socket *sock, int level, int optname,
3362 char *optval, int *optlen)
3363{
3364 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3365 char __user *uoptval;
3366 int __user *uoptlen;
ac5a488e
SS
3367 int err;
3368
fb8621bb
NK
3369 uoptval = (char __user __force *) optval;
3370 uoptlen = (int __user __force *) optlen;
3371
ac5a488e
SS
3372 set_fs(KERNEL_DS);
3373 if (level == SOL_SOCKET)
fb8621bb 3374 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3375 else
fb8621bb
NK
3376 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3377 uoptlen);
ac5a488e
SS
3378 set_fs(oldfs);
3379 return err;
3380}
c6d409cf 3381EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3382
3383int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3384 char *optval, unsigned int optlen)
ac5a488e
SS
3385{
3386 mm_segment_t oldfs = get_fs();
fb8621bb 3387 char __user *uoptval;
ac5a488e
SS
3388 int err;
3389
fb8621bb
NK
3390 uoptval = (char __user __force *) optval;
3391
ac5a488e
SS
3392 set_fs(KERNEL_DS);
3393 if (level == SOL_SOCKET)
fb8621bb 3394 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3395 else
fb8621bb 3396 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3397 optlen);
3398 set_fs(oldfs);
3399 return err;
3400}
c6d409cf 3401EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3402
3403int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3404 size_t size, int flags)
3405{
3406 if (sock->ops->sendpage)
3407 return sock->ops->sendpage(sock, page, offset, size, flags);
3408
3409 return sock_no_sendpage(sock, page, offset, size, flags);
3410}
c6d409cf 3411EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3412
306b13eb
TH
3413int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3414 size_t size, int flags)
3415{
3416 struct socket *sock = sk->sk_socket;
3417
3418 if (sock->ops->sendpage_locked)
3419 return sock->ops->sendpage_locked(sk, page, offset, size,
3420 flags);
3421
3422 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3423}
3424EXPORT_SYMBOL(kernel_sendpage_locked);
3425
ac5a488e
SS
3426int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3427{
3428 mm_segment_t oldfs = get_fs();
3429 int err;
3430
3431 set_fs(KERNEL_DS);
3432 err = sock->ops->ioctl(sock, cmd, arg);
3433 set_fs(oldfs);
3434
3435 return err;
3436}
c6d409cf 3437EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3438
91cf45f0
TM
3439int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3440{
3441 return sock->ops->shutdown(sock, how);
3442}
91cf45f0 3443EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075
P
3444
3445/* This routine returns the IP overhead imposed by a socket i.e.
3446 * the length of the underlying IP header, depending on whether
3447 * this is an IPv4 or IPv6 socket and the length from IP options turned
57240d00 3448 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075
P
3449 */
3450u32 kernel_sock_ip_overhead(struct sock *sk)
3451{
3452 struct inet_sock *inet;
3453 struct ip_options_rcu *opt;
3454 u32 overhead = 0;
113c3075
P
3455#if IS_ENABLED(CONFIG_IPV6)
3456 struct ipv6_pinfo *np;
3457 struct ipv6_txoptions *optv6 = NULL;
3458#endif /* IS_ENABLED(CONFIG_IPV6) */
3459
3460 if (!sk)
3461 return overhead;
3462
113c3075
P
3463 switch (sk->sk_family) {
3464 case AF_INET:
3465 inet = inet_sk(sk);
3466 overhead += sizeof(struct iphdr);
3467 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3468 sock_owned_by_user(sk));
113c3075
P
3469 if (opt)
3470 overhead += opt->opt.optlen;
3471 return overhead;
3472#if IS_ENABLED(CONFIG_IPV6)
3473 case AF_INET6:
3474 np = inet6_sk(sk);
3475 overhead += sizeof(struct ipv6hdr);
3476 if (np)
3477 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3478 sock_owned_by_user(sk));
113c3075
P
3479 if (optv6)
3480 overhead += (optv6->opt_flen + optv6->opt_nflen);
3481 return overhead;
3482#endif /* IS_ENABLED(CONFIG_IPV6) */
3483 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3484 return overhead;
3485 }
3486}
3487EXPORT_SYMBOL(kernel_sock_ip_overhead);